assemble.c

   1 /* ----------------------------------------------------------------------- *
   2  *
   3  *   Copyright 1996-2012 The NASM Authors - All Rights Reserved
   4  *   See the file AUTHORS included with the NASM distribution for
   5  *   the specific copyright holders.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following
   9  *   conditions are met:
  10  *
  11  *   * Redistributions of source code must retain the above copyright
  12  *     notice, this list of conditions and the following disclaimer.
  13  *   * Redistributions in binary form must reproduce the above
  14  *     copyright notice, this list of conditions and the following
  15  *     disclaimer in the documentation and/or other materials provided
  16  *     with the distribution.
  17  *
  18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  19  *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  20  *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  21  *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22  *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  23  *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24  *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  25  *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28  *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  29  *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  30  *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31  *
  32  * ----------------------------------------------------------------------- */
  33
  34 /*
  35  * assemble.c   code generation for the Netwide Assembler
  36  *
  37  * the actual codes (C syntax, i.e. octal):
  38  * \0            - terminates the code. (Unless it's a literal of course.)
  39  * \1..\4        - that many literal bytes follow in the code stream
  40  * \5            - add 4 to the primary operand number (b, low octdigit)
  41  * \6            - add 4 to the secondary operand number (a, middle octdigit)
  42  * \7            - add 4 to both the primary and the secondary operand number
  43  * \10..\13      - a literal byte follows in the code stream, to be added
  44  *                 to the register value of operand 0..3
  45  * \14..\17      - a signed byte immediate operand, from operand 0..3
  46  * \20..\23      - a byte immediate operand, from operand 0..3
  47  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  48  * \30..\33      - a word immediate operand, from operand 0..3
  49  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  50  *                 assembly mode or the operand-size override on the operand
  51  * \40..\43      - a long immediate operand, from operand 0..3
  52  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  53  *                 depending on the address size of the instruction.
  54  * \50..\53      - a byte relative operand, from operand 0..3
  55  * \54..\57      - a qword immediate operand, from operand 0..3
  56  * \60..\63      - a word relative operand, from operand 0..3
  57  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  58  *                 assembly mode or the operand-size override on the operand
  59  * \70..\73      - a long relative operand, from operand 0..3
  60  * \74..\77      - a word constant, from the _segment_ part of operand 0..3
  61  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  62  *                 field the register value of operand b.
  63  * \140..\143    - an immediate word or signed byte for operand 0..3
  64  * \144..\147    - or 2 (s-field) into opcode byte if operand 0..3
  65  *                  is a signed byte rather than a word.  Opcode byte follows.
  66  * \150..\153    - an immediate dword or signed byte for operand 0..3
  67  * \154..\157    - or 2 (s-field) into opcode byte if operand 0..3
  68  *                  is a signed byte rather than a dword.  Opcode byte follows.
  69  * \172\ab       - the register number from operand a in bits 7..4, with
  70  *                 the 4-bit immediate from operand b in bits 3..0.
  71  * \173\xab      - the register number from operand a in bits 7..4, with
  72  *                 the value b in bits 3..0.
  73  * \174..\177    - the register number from operand 0..3 in bits 7..4, and
  74  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  75  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  76  *                 field equal to digit b.
  77  * \250..\253    - same as \150..\153, except warn if the 64-bit operand
  78  *                 is not equal to the truncated and sign-extended 32-bit
  79  *                 operand; used for 32-bit immediates in 64-bit mode.
  80  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
  81  * \260..\263    - this instruction uses VEX/XOP rather than REX, with the
  82  *                 V field taken from operand 0..3.
  83  * \270          - this instruction uses VEX/XOP rather than REX, with the
  84  *                 V field set to 1111b.
  85  *
  86  * VEX/XOP prefixes are followed by the sequence:
  87  * \tmm\wlp        where mm is the M field; and wlp is:
  88  *                 00 wwl lpp
  89  *                 [l0]  ll = 0 for L = 0 (.128, .lz)
  90  *                 [l1]  ll = 1 for L = 1 (.256)
  91  *                 [lig] ll = 2 for L don't care (always assembled as 0)
  92  *
  93  *                 [w0]  ww = 0 for W = 0
  94  *                 [w1 ] ww = 1 for W = 1
  95  *                 [wig] ww = 2 for W don't care (always assembled as 0)
  96  *                 [ww]  ww = 3 for W used as REX.W
  97  *
  98  * t = 0 for VEX (C4/C5), t = 1 for XOP (8F).
  99  *
 100  * \271          - instruction takes XRELEASE (F3) with or without lock
 101  * \272          - instruction takes XACQUIRE/XRELEASE with or without lock
 102  * \273          - instruction takes XACQUIRE/XRELEASE with lock only
 103  * \274..\277    - a signed byte immediate operand, from operand 0..3,
 104  *                 which is to be extended to the operand size.
 105  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
 106  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
 107  * \312          - (disassembler only) invalid with non-default address size.
 108  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
 109  * \314          - (disassembler only) invalid with REX.B
 110  * \315          - (disassembler only) invalid with REX.X
 111  * \316          - (disassembler only) invalid with REX.R
 112  * \317          - (disassembler only) invalid with REX.W
 113  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
 114  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
 115  * \322          - indicates that this instruction is only valid when the
 116  *                 operand size is the default (instruction to disassembler,
 117  *                 generates no code in the assembler)
 118  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
 119  * \324          - indicates 64-bit operand size requiring REX prefix.
 120  * \325          - instruction which always uses spl/bpl/sil/dil
 121  * \330          - a literal byte follows in the code stream, to be added
 122  *                 to the condition code value of the instruction.
 123  * \331          - instruction not valid with REP prefix.  Hint for
 124  *                 disassembler only; for SSE instructions.
 125  * \332          - REP prefix (0xF2 byte) used as opcode extension.
 126  * \333          - REP prefix (0xF3 byte) used as opcode extension.
 127  * \334          - LOCK prefix used as REX.R (used in non-64-bit mode)
 128  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
 129  * \336          - force a REP(E) prefix (0xF3) even if not specified.
 130  * \337          - force a REPNE prefix (0xF2) even if not specified.
 131  *                 \336-\337 are still listed as prefixes in the disassembler.
 132  * \340          - reserve <operand 0> bytes of uninitialized storage.
 133  *                 Operand 0 had better be a segmentless constant.
 134  * \341          - this instruction needs a WAIT "prefix"
 135  * \344,\345     - the PUSH/POP (respectively) codes for CS, DS, ES, SS
 136  *                 (POP is never used for CS) depending on operand 0
 137  * \346,\347     - the second byte of PUSH/POP codes for FS, GS, depending
 138  *                 on operand 0
 139  * \360          - no SSE prefix (== \364\331)
 140  * \361          - 66 SSE prefix (== \366\331)
 141  * \362          - F2 SSE prefix (== \364\332)
 142  * \363          - F3 SSE prefix (== \364\333)
 143  * \364          - operand-size prefix (0x66) not permitted
 144  * \365          - address-size prefix (0x67) not permitted
 145  * \366          - operand-size prefix (0x66) used as opcode extension
 146  * \367          - address-size prefix (0x67) used as opcode extension
 147  * \370,\371     - match only if operand 0 meets byte jump criteria.
 148  *                 370 is used for Jcc, 371 is used for JMP.
 149  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 150  *                 used for conditional jump over longer jump
 151  * \374          - this instruction takes an XMM VSIB memory EA
 152  * \375          - this instruction takes an YMM VSIB memory EA
 153  */
 154
 155 #include "compiler.h"
 156
 157 #include <stdio.h>
 158 #include <string.h>
 159 #include <inttypes.h>
 160
 161 #include "nasm.h"
 162 #include "nasmlib.h"
 163 #include "assemble.h"
 164 #include "insns.h"
 165 #include "tables.h"
 166
 167 enum match_result {
 168     /*
 169      * Matching errors.  These should be sorted so that more specific
 170      * errors come later in the sequence.
 171      */
 172     MERR_INVALOP,
 173     MERR_OPSIZEMISSING,
 174     MERR_OPSIZEMISMATCH,
 175     MERR_BADCPU,
 176     MERR_BADMODE,
 177     MERR_BADHLE,
 178     /*
 179      * Matching success; the conditional ones first
 180      */
 181     MOK_JUMP,   /* Matching OK but needs jmp_match() */
 182     MOK_GOOD    /* Matching unconditionally OK */
 183 };
 184
 185 typedef struct {
 186     enum ea_type type;            /* what kind of EA is this? */
 187     int sib_present;              /* is a SIB byte necessary? */
 188     int bytes;                    /* # of bytes of offset needed */
 189     int size;                     /* lazy - this is sib+bytes+1 */
 190     uint8_t modrm, sib, rex, rip; /* the bytes themselves */
 191 } ea;
 192
 193 #define GEN_SIB(scale, index, base)                 \
 194         (((scale) << 6) | ((index) << 3) | ((base)))
 195
 196 #define GEN_MODRM(mod, reg, rm)                     \
 197         (((mod) << 6) | (((reg) & 7) << 3) | ((rm) & 7))
 198
 199 static uint32_t cpu;            /* cpu level received from nasm.c */
 200 static efunc errfunc;
 201 static struct ofmt *outfmt;
 202 static ListGen *list;
 203
 204 static int64_t calcsize(int32_t, int64_t, int, insn *,
 205                         const struct itemplate *);
 206 static void gencode(int32_t segment, int64_t offset, int bits,
 207                     insn * ins, const struct itemplate *temp,
 208                     int64_t insn_end);
 209 static enum match_result find_match(const struct itemplate **tempp,
 210                                     insn *instruction,
 211                                     int32_t segment, int64_t offset, int bits);
 212 static enum match_result matches(const struct itemplate *, insn *, int bits);
 213 static opflags_t regflag(const operand *);
 214 static int32_t regval(const operand *);
 215 static int rexflags(int, opflags_t, int);
 216 static int op_rexflags(const operand *, int);
 217 static void add_asp(insn *, int);
 218
 219 static enum ea_type process_ea(operand *, ea *, int, int, int, opflags_t);
 220
 221 static int has_prefix(insn * ins, enum prefix_pos pos, int prefix)
 222 {
 223     return ins->prefixes[pos] == prefix;
 224 }
 225
 226 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 227 {
 228     if (ins->prefixes[pos])
 229         errfunc(ERR_NONFATAL, "invalid %s prefix",
 230                 prefix_name(ins->prefixes[pos]));
 231 }
 232
 233 static const char *size_name(int size)
 234 {
 235     switch (size) {
 236     case 1:
 237         return "byte";
 238     case 2:
 239         return "word";
 240     case 4:
 241         return "dword";
 242     case 8:
 243         return "qword";
 244     case 10:
 245         return "tword";
 246     case 16:
 247         return "oword";
 248     case 32:
 249         return "yword";
 250     default:
 251         return "???";
 252     }
 253 }
 254
 255 static void warn_overflow(int pass, int size)
 256 {
 257     errfunc(ERR_WARNING | pass | ERR_WARN_NOV,
 258             "%s data exceeds bounds", size_name(size));
 259 }
 260
 261 static void warn_overflow_const(int64_t data, int size)
 262 {
 263     if (overflow_general(data, size))
 264         warn_overflow(ERR_PASS1, size);
 265 }
 266
 267 static void warn_overflow_opd(const struct operand *o, int size)
 268 {
 269     if (o->wrt == NO_SEG && o->segment == NO_SEG) {
 270         if (overflow_general(o->offset, size))
 271             warn_overflow(ERR_PASS2, size);
 272     }
 273 }
 274
 275 /*
 276  * This routine wrappers the real output format's output routine,
 277  * in order to pass a copy of the data off to the listing file
 278  * generator at the same time.
 279  */
 280 static void out(int64_t offset, int32_t segto, const void *data,
 281                 enum out_type type, uint64_t size,
 282                 int32_t segment, int32_t wrt)
 283 {
 284     static int32_t lineno = 0;     /* static!!! */
 285     static char *lnfname = NULL;
 286     uint8_t p[8];
 287
 288     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 289         /*
 290          * This is a non-relocated address, and we're going to
 291          * convert it into RAWDATA format.
 292          */
 293         uint8_t *q = p;
 294
 295         if (size > 8) {
 296             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 297             return;
 298         }
 299
 300         WRITEADDR(q, *(int64_t *)data, size);
 301         data = p;
 302         type = OUT_RAWDATA;
 303     }
 304
 305     list->output(offset, data, type, size);
 306
 307     /*
 308      * this call to src_get determines when we call the
 309      * debug-format-specific "linenum" function
 310      * it updates lineno and lnfname to the current values
 311      * returning 0 if "same as last time", -2 if lnfname
 312      * changed, and the amount by which lineno changed,
 313      * if it did. thus, these variables must be static
 314      */
 315
 316     if (src_get(&lineno, &lnfname))
 317         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 318
 319     outfmt->output(segto, data, type, size, segment, wrt);
 320 }
 321
 322 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 323                       insn * ins, const struct itemplate *temp)
 324 {
 325     int64_t isize;
 326     const uint8_t *code = temp->code;
 327     uint8_t c = code[0];
 328
 329     if (((c & ~1) != 0370) || (ins->oprs[0].type & STRICT))
 330         return false;
 331     if (!optimizing)
 332         return false;
 333     if (optimizing < 0 && c == 0371)
 334         return false;
 335
 336     isize = calcsize(segment, offset, bits, ins, temp);
 337
 338     if (ins->oprs[0].opflags & OPFLAG_UNKNOWN)
 339         /* Be optimistic in pass 1 */
 340         return true;
 341
 342     if (ins->oprs[0].segment != segment)
 343         return false;
 344
 345     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 346     return (isize >= -128 && isize <= 127); /* is it byte size? */
 347 }
 348
 349 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 350                  insn * instruction, struct ofmt *output, efunc error,
 351                  ListGen * listgen)
 352 {
 353     const struct itemplate *temp;
 354     int j;
 355     enum match_result m;
 356     int64_t insn_end;
 357     int32_t itimes;
 358     int64_t start = offset;
 359     int64_t wsize;              /* size for DB etc. */
 360
 361     errfunc = error;            /* to pass to other functions */
 362     cpu = cp;
 363     outfmt = output;            /* likewise */
 364     list = listgen;             /* and again */
 365
 366     wsize = idata_bytes(instruction->opcode);
 367     if (wsize == -1)
 368         return 0;
 369
 370     if (wsize) {
 371         extop *e;
 372         int32_t t = instruction->times;
 373         if (t < 0)
 374             errfunc(ERR_PANIC,
 375                     "instruction->times < 0 (%ld) in assemble()", t);
 376
 377         while (t--) {           /* repeat TIMES times */
 378             list_for_each(e, instruction->eops) {
 379                 if (e->type == EOT_DB_NUMBER) {
 380                     if (wsize > 8) {
 381                         errfunc(ERR_NONFATAL,
 382                                 "integer supplied to a DT, DO or DY"
 383                                 " instruction");
 384                     } else {
 385                         out(offset, segment, &e->offset,
 386                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 387                         offset += wsize;
 388                     }
 389                 } else if (e->type == EOT_DB_STRING ||
 390                            e->type == EOT_DB_STRING_FREE) {
 391                     int align;
 392
 393                     out(offset, segment, e->stringval,
 394                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 395                     align = e->stringlen % wsize;
 396
 397                     if (align) {
 398                         align = wsize - align;
 399                         out(offset, segment, zero_buffer,
 400                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 401                     }
 402                     offset += e->stringlen + align;
 403                 }
 404             }
 405             if (t > 0 && t == instruction->times - 1) {
 406                 /*
 407                  * Dummy call to list->output to give the offset to the
 408                  * listing module.
 409                  */
 410                 list->output(offset, NULL, OUT_RAWDATA, 0);
 411                 list->uplevel(LIST_TIMES);
 412             }
 413         }
 414         if (instruction->times > 1)
 415             list->downlevel(LIST_TIMES);
 416         return offset - start;
 417     }
 418
 419     if (instruction->opcode == I_INCBIN) {
 420         const char *fname = instruction->eops->stringval;
 421         FILE *fp;
 422
 423         fp = fopen(fname, "rb");
 424         if (!fp) {
 425             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 426                   fname);
 427         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 428             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 429                   fname);
 430         } else {
 431             static char buf[4096];
 432             size_t t = instruction->times;
 433             size_t base = 0;
 434             size_t len;
 435
 436             len = ftell(fp);
 437             if (instruction->eops->next) {
 438                 base = instruction->eops->next->offset;
 439                 len -= base;
 440                 if (instruction->eops->next->next &&
 441                     len > (size_t)instruction->eops->next->next->offset)
 442                     len = (size_t)instruction->eops->next->next->offset;
 443             }
 444             /*
 445              * Dummy call to list->output to give the offset to the
 446              * listing module.
 447              */
 448             list->output(offset, NULL, OUT_RAWDATA, 0);
 449             list->uplevel(LIST_INCBIN);
 450             while (t--) {
 451                 size_t l;
 452
 453                 fseek(fp, base, SEEK_SET);
 454                 l = len;
 455                 while (l > 0) {
 456                     int32_t m;
 457                     m = fread(buf, 1, l > sizeof(buf) ? sizeof(buf) : l, fp);
 458                     if (!m) {
 459                         /*
 460                          * This shouldn't happen unless the file
 461                          * actually changes while we are reading
 462                          * it.
 463                          */
 464                         error(ERR_NONFATAL,
 465                               "`incbin': unexpected EOF while"
 466                               " reading file `%s'", fname);
 467                         t = 0;  /* Try to exit cleanly */
 468                         break;
 469                     }
 470                     out(offset, segment, buf, OUT_RAWDATA, m,
 471                         NO_SEG, NO_SEG);
 472                     l -= m;
 473                 }
 474             }
 475             list->downlevel(LIST_INCBIN);
 476             if (instruction->times > 1) {
 477                 /*
 478                  * Dummy call to list->output to give the offset to the
 479                  * listing module.
 480                  */
 481                 list->output(offset, NULL, OUT_RAWDATA, 0);
 482                 list->uplevel(LIST_TIMES);
 483                 list->downlevel(LIST_TIMES);
 484             }
 485             fclose(fp);
 486             return instruction->times * len;
 487         }
 488         return 0;               /* if we're here, there's an error */
 489     }
 490
 491     /* Check to see if we need an address-size prefix */
 492     add_asp(instruction, bits);
 493
 494     m = find_match(&temp, instruction, segment, offset, bits);
 495
 496     if (m == MOK_GOOD) {
 497         /* Matches! */
 498         int64_t insn_size = calcsize(segment, offset, bits, instruction, temp);
 499         itimes = instruction->times;
 500         if (insn_size < 0)  /* shouldn't be, on pass two */
 501             error(ERR_PANIC, "errors made it through from pass one");
 502         else
 503             while (itimes--) {
 504                 for (j = 0; j < MAXPREFIX; j++) {
 505                     uint8_t c = 0;
 506                     switch (instruction->prefixes[j]) {
 507                     case P_WAIT:
 508                         c = 0x9B;
 509                         break;
 510                     case P_LOCK:
 511                         c = 0xF0;
 512                         break;
 513                     case P_REPNE:
 514                     case P_REPNZ:
 515                     case P_XACQUIRE:
 516                         c = 0xF2;
 517                         break;
 518                     case P_REPE:
 519                     case P_REPZ:
 520                     case P_REP:
 521                     case P_XRELEASE:
 522                         c = 0xF3;
 523                         break;
 524                     case R_CS:
 525                         if (bits == 64) {
 526                             error(ERR_WARNING | ERR_PASS2,
 527                                   "cs segment base generated, but will be ignored in 64-bit mode");
 528                         }
 529                         c = 0x2E;
 530                         break;
 531                     case R_DS:
 532                         if (bits == 64) {
 533                             error(ERR_WARNING | ERR_PASS2,
 534                                   "ds segment base generated, but will be ignored in 64-bit mode");
 535                         }
 536                         c = 0x3E;
 537                         break;
 538                     case R_ES:
 539                         if (bits == 64) {
 540                             error(ERR_WARNING | ERR_PASS2,
 541                                   "es segment base generated, but will be ignored in 64-bit mode");
 542                         }
 543                         c = 0x26;
 544                         break;
 545                     case R_FS:
 546                         c = 0x64;
 547                         break;
 548                     case R_GS:
 549                         c = 0x65;
 550                         break;
 551                     case R_SS:
 552                         if (bits == 64) {
 553                             error(ERR_WARNING | ERR_PASS2,
 554                                   "ss segment base generated, but will be ignored in 64-bit mode");
 555                         }
 556                         c = 0x36;
 557                         break;
 558                     case R_SEGR6:
 559                     case R_SEGR7:
 560                         error(ERR_NONFATAL,
 561                               "segr6 and segr7 cannot be used as prefixes");
 562                         break;
 563                     case P_A16:
 564                         if (bits == 64) {
 565                             error(ERR_NONFATAL,
 566                                   "16-bit addressing is not supported "
 567                                   "in 64-bit mode");
 568                         } else if (bits != 16)
 569                             c = 0x67;
 570                         break;
 571                     case P_A32:
 572                         if (bits != 32)
 573                             c = 0x67;
 574                         break;
 575                     case P_A64:
 576                         if (bits != 64) {
 577                             error(ERR_NONFATAL,
 578                                   "64-bit addressing is only supported "
 579                                   "in 64-bit mode");
 580                         }
 581                         break;
 582                     case P_ASP:
 583                         c = 0x67;
 584                         break;
 585                     case P_O16:
 586                         if (bits != 16)
 587                             c = 0x66;
 588                         break;
 589                     case P_O32:
 590                         if (bits == 16)
 591                             c = 0x66;
 592                         break;
 593                     case P_O64:
 594                         /* REX.W */
 595                         break;
 596                     case P_OSP:
 597                         c = 0x66;
 598                         break;
 599                     case P_none:
 600                         break;
 601                     default:
 602                         error(ERR_PANIC, "invalid instruction prefix");
 603                     }
 604                     if (c != 0) {
 605                         out(offset, segment, &c, OUT_RAWDATA, 1,
 606                             NO_SEG, NO_SEG);
 607                         offset++;
 608                     }
 609                 }
 610                 insn_end = offset + insn_size;
 611                 gencode(segment, offset, bits, instruction,
 612                         temp, insn_end);
 613                 offset += insn_size;
 614                 if (itimes > 0 && itimes == instruction->times - 1) {
 615                     /*
 616                      * Dummy call to list->output to give the offset to the
 617                      * listing module.
 618                      */
 619                     list->output(offset, NULL, OUT_RAWDATA, 0);
 620                     list->uplevel(LIST_TIMES);
 621                 }
 622             }
 623         if (instruction->times > 1)
 624             list->downlevel(LIST_TIMES);
 625         return offset - start;
 626     } else {
 627         /* No match */
 628         switch (m) {
 629         case MERR_OPSIZEMISSING:
 630             error(ERR_NONFATAL, "operation size not specified");
 631             break;
 632         case MERR_OPSIZEMISMATCH:
 633             error(ERR_NONFATAL, "mismatch in operand sizes");
 634             break;
 635         case MERR_BADCPU:
 636             error(ERR_NONFATAL, "no instruction for this cpu level");
 637             break;
 638         case MERR_BADMODE:
 639             error(ERR_NONFATAL, "instruction not supported in %d-bit mode",
 640                   bits);
 641             break;
 642         default:
 643             error(ERR_NONFATAL,
 644                   "invalid combination of opcode and operands");
 645             break;
 646         }
 647     }
 648     return 0;
 649 }
 650
 651 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 652                   insn * instruction, efunc error)
 653 {
 654     const struct itemplate *temp;
 655     enum match_result m;
 656
 657     errfunc = error;            /* to pass to other functions */
 658     cpu = cp;
 659
 660     if (instruction->opcode == I_none)
 661         return 0;
 662
 663     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 664         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 665         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 666         instruction->opcode == I_DY) {
 667         extop *e;
 668         int32_t isize, osize, wsize;
 669
 670         isize = 0;
 671         wsize = idata_bytes(instruction->opcode);
 672
 673         list_for_each(e, instruction->eops) {
 674             int32_t align;
 675
 676             osize = 0;
 677             if (e->type == EOT_DB_NUMBER) {
 678                 osize = 1;
 679                 warn_overflow_const(e->offset, wsize);
 680             } else if (e->type == EOT_DB_STRING ||
 681                        e->type == EOT_DB_STRING_FREE)
 682                 osize = e->stringlen;
 683
 684             align = (-osize) % wsize;
 685             if (align < 0)
 686                 align += wsize;
 687             isize += osize + align;
 688         }
 689         return isize * instruction->times;
 690     }
 691
 692     if (instruction->opcode == I_INCBIN) {
 693         const char *fname = instruction->eops->stringval;
 694         FILE *fp;
 695         int64_t val = 0;
 696         size_t len;
 697
 698         fp = fopen(fname, "rb");
 699         if (!fp)
 700             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 701                   fname);
 702         else if (fseek(fp, 0L, SEEK_END) < 0)
 703             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 704                   fname);
 705         else {
 706             len = ftell(fp);
 707             if (instruction->eops->next) {
 708                 len -= instruction->eops->next->offset;
 709                 if (instruction->eops->next->next &&
 710                     len > (size_t)instruction->eops->next->next->offset) {
 711                     len = (size_t)instruction->eops->next->next->offset;
 712                 }
 713             }
 714             val = instruction->times * len;
 715         }
 716         if (fp)
 717             fclose(fp);
 718         return val;
 719     }
 720
 721     /* Check to see if we need an address-size prefix */
 722     add_asp(instruction, bits);
 723
 724     m = find_match(&temp, instruction, segment, offset, bits);
 725     if (m == MOK_GOOD) {
 726         /* we've matched an instruction. */
 727         int64_t isize;
 728         int j;
 729
 730         isize = calcsize(segment, offset, bits, instruction, temp);
 731         if (isize < 0)
 732             return -1;
 733         for (j = 0; j < MAXPREFIX; j++) {
 734             switch (instruction->prefixes[j]) {
 735             case P_A16:
 736                 if (bits != 16)
 737                     isize++;
 738                 break;
 739             case P_A32:
 740                 if (bits != 32)
 741                     isize++;
 742                 break;
 743             case P_O16:
 744                 if (bits != 16)
 745                     isize++;
 746                 break;
 747             case P_O32:
 748                 if (bits == 16)
 749                     isize++;
 750                 break;
 751             case P_A64:
 752             case P_O64:
 753             case P_none:
 754                 break;
 755             default:
 756                 isize++;
 757                 break;
 758             }
 759         }
 760         return isize * instruction->times;
 761     } else {
 762         return -1;                  /* didn't match any instruction */
 763     }
 764 }
 765
 766 static bool possible_sbyte(operand *o)
 767 {
 768     return o->wrt == NO_SEG && o->segment == NO_SEG &&
 769         !(o->opflags & OPFLAG_UNKNOWN) &&
 770         optimizing >= 0 && !(o->type & STRICT);
 771 }
 772
 773 /* check that opn[op]  is a signed byte of size 16 or 32 */
 774 static bool is_sbyte16(operand *o)
 775 {
 776     int16_t v;
 777
 778     if (!possible_sbyte(o))
 779         return false;
 780
 781     v = o->offset;
 782     return v >= -128 && v <= 127;
 783 }
 784
 785 static bool is_sbyte32(operand *o)
 786 {
 787     int32_t v;
 788
 789     if (!possible_sbyte(o))
 790         return false;
 791
 792     v = o->offset;
 793     return v >= -128 && v <= 127;
 794 }
 795
 796 static void bad_hle_warn(const insn * ins, uint8_t hleok)
 797 {
 798     enum prefixes rep_pfx = ins->prefixes[PPS_REP];
 799     enum whatwarn { w_none, w_lock, w_inval } ww;
 800     static const enum whatwarn warn[2][4] =
 801     {
 802         { w_inval, w_inval, w_none, w_lock }, /* XACQUIRE */
 803         { w_inval, w_none,  w_none, w_lock }, /* XRELEASE */
 804     };
 805     unsigned int n;
 806
 807     n = (unsigned int)rep_pfx - P_XACQUIRE;
 808     if (n > 1)
 809         return;                 /* Not XACQUIRE/XRELEASE */
 810
 811     ww = warn[n][hleok];
 812     if (!is_class(MEMORY, ins->oprs[0].type))
 813         ww = w_inval;           /* HLE requires operand 0 to be memory */
 814
 815     switch (ww) {
 816     case w_none:
 817         break;
 818
 819     case w_lock:
 820         if (ins->prefixes[PPS_LOCK] != P_LOCK) {
 821             errfunc(ERR_WARNING | ERR_WARN_HLE | ERR_PASS2,
 822                     "%s with this instruction requires lock",
 823                     prefix_name(rep_pfx));
 824         }
 825         break;
 826
 827     case w_inval:
 828         errfunc(ERR_WARNING | ERR_WARN_HLE | ERR_PASS2,
 829                 "%s invalid with this instruction",
 830                 prefix_name(rep_pfx));
 831         break;
 832     }
 833 }
 834
 835 /* Common construct */
 836 #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
 837
 838 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 839                         insn * ins, const struct itemplate *temp)
 840 {
 841     const uint8_t *codes = temp->code;
 842     int64_t length = 0;
 843     uint8_t c;
 844     int rex_mask = ~0;
 845     int op1, op2;
 846     struct operand *opx;
 847     uint8_t opex = 0;
 848     enum ea_type eat;
 849     uint8_t hleok = 0;
 850     bool lockcheck = true;
 851
 852     ins->rex = 0;               /* Ensure REX is reset */
 853     eat = EA_SCALAR;            /* Expect a scalar EA */
 854
 855     if (ins->prefixes[PPS_OSIZE] == P_O64)
 856         ins->rex |= REX_W;
 857
 858     (void)segment;              /* Don't warn that this parameter is unused */
 859     (void)offset;               /* Don't warn that this parameter is unused */
 860
 861     while (*codes) {
 862         c = *codes++;
 863         op1 = (c & 3) + ((opex & 1) << 2);
 864         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
 865         opx = &ins->oprs[op1];
 866         opex = 0;               /* For the next iteration */
 867
 868         switch (c) {
 869         case 01:
 870         case 02:
 871         case 03:
 872         case 04:
 873             codes += c, length += c;
 874             break;
 875
 876         case 05:
 877         case 06:
 878         case 07:
 879             opex = c;
 880             break;
 881
 882         case4(010):
 883             ins->rex |=
 884                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 885             codes++, length++;
 886             break;
 887
 888         case4(014):
 889         case4(020):
 890         case4(024):
 891             length++;
 892             break;
 893
 894         case4(030):
 895             length += 2;
 896             break;
 897
 898         case4(034):
 899             if (opx->type & (BITS16 | BITS32 | BITS64))
 900                 length += (opx->type & BITS16) ? 2 : 4;
 901             else
 902                 length += (bits == 16) ? 2 : 4;
 903             break;
 904
 905         case4(040):
 906             length += 4;
 907             break;
 908
 909         case4(044):
 910             length += ins->addr_size >> 3;
 911             break;
 912
 913         case4(050):
 914             length++;
 915             break;
 916
 917         case4(054):
 918             length += 8; /* MOV reg64/imm */
 919             break;
 920
 921         case4(060):
 922             length += 2;
 923             break;
 924
 925         case4(064):
 926             if (opx->type & (BITS16 | BITS32 | BITS64))
 927                 length += (opx->type & BITS16) ? 2 : 4;
 928             else
 929                 length += (bits == 16) ? 2 : 4;
 930             break;
 931
 932         case4(070):
 933             length += 4;
 934             break;
 935
 936         case4(074):
 937             length += 2;
 938             break;
 939
 940         case4(0140):
 941             length += is_sbyte16(opx) ? 1 : 2;
 942             break;
 943
 944         case4(0144):
 945             codes++;
 946             length++;
 947             break;
 948
 949         case4(0150):
 950             length += is_sbyte32(opx) ? 1 : 4;
 951             break;
 952
 953         case4(0154):
 954             codes++;
 955             length++;
 956             break;
 957
 958         case 0172:
 959         case 0173:
 960             codes++;
 961             length++;
 962             break;
 963
 964         case4(0174):
 965             length++;
 966             break;
 967
 968         case4(0250):
 969             length += is_sbyte32(opx) ? 1 : 4;
 970             break;
 971
 972         case4(0254):
 973             length += 4;
 974             break;
 975
 976         case4(0260):
 977             ins->rex |= REX_V;
 978             ins->vexreg = regval(opx);
 979             ins->vex_cm = *codes++;
 980             ins->vex_wlp = *codes++;
 981             break;
 982
 983         case 0270:
 984             ins->rex |= REX_V;
 985             ins->vexreg = 0;
 986             ins->vex_cm = *codes++;
 987             ins->vex_wlp = *codes++;
 988             break;
 989
 990         case 0271:
 991         case 0272:
 992         case 0273:
 993             hleok = c & 3;
 994             break;
 995
 996         case4(0274):
 997             length++;
 998             break;
 999
1000         case4(0300):
1001             break;
1002
1003         case 0310:
1004             if (bits == 64)
1005                 return -1;
1006             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
1007             break;
1008
1009         case 0311:
1010             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
1011             break;
1012
1013         case 0312:
1014             break;
1015
1016         case 0313:
1017             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
1018                 has_prefix(ins, PPS_ASIZE, P_A32))
1019                 return -1;
1020             break;
1021
1022         case4(0314):
1023             break;
1024
1025         case 0320:
1026         {
1027             enum prefixes pfx = ins->prefixes[PPS_OSIZE];
1028             if (pfx == P_O16)
1029                 break;
1030             if (pfx != P_none)
1031                 errfunc(ERR_WARNING | ERR_PASS2, "invalid operand size prefix");
1032             else
1033                 ins->prefixes[PPS_OSIZE] = P_O16;
1034             break;
1035         }
1036
1037         case 0321:
1038         {
1039             enum prefixes pfx = ins->prefixes[PPS_OSIZE];
1040             if (pfx == P_O32)
1041                 break;
1042             if (pfx != P_none)
1043                 errfunc(ERR_WARNING | ERR_PASS2, "invalid operand size prefix");
1044             else
1045                 ins->prefixes[PPS_OSIZE] = P_O32;
1046             break;
1047         }
1048
1049         case 0322:
1050             break;
1051
1052         case 0323:
1053             rex_mask &= ~REX_W;
1054             break;
1055
1056         case 0324:
1057             ins->rex |= REX_W;
1058             break;
1059
1060         case 0325:
1061             ins->rex |= REX_NH;
1062             break;
1063
1064         case 0330:
1065             codes++, length++;
1066             break;
1067
1068         case 0331:
1069             break;
1070
1071         case 0332:
1072         case 0333:
1073             length++;
1074             break;
1075
1076         case 0334:
1077             ins->rex |= REX_L;
1078             break;
1079
1080         case 0335:
1081             break;
1082
1083         case 0336:
1084             if (!ins->prefixes[PPS_REP])
1085                 ins->prefixes[PPS_REP] = P_REP;
1086             break;
1087
1088         case 0337:
1089             if (!ins->prefixes[PPS_REP])
1090                 ins->prefixes[PPS_REP] = P_REPNE;
1091             break;
1092
1093         case 0340:
1094             if (ins->oprs[0].segment != NO_SEG)
1095                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1096                         " quantity of BSS space");
1097             else
1098                 length += ins->oprs[0].offset;
1099             break;
1100
1101         case 0341:
1102             if (!ins->prefixes[PPS_WAIT])
1103                 ins->prefixes[PPS_WAIT] = P_WAIT;
1104             break;
1105
1106         case4(0344):
1107             length++;
1108             break;
1109
1110         case 0360:
1111             break;
1112
1113         case 0361:
1114         case 0362:
1115         case 0363:
1116             length++;
1117             break;
1118
1119         case 0364:
1120         case 0365:
1121             break;
1122
1123         case 0366:
1124         case 0367:
1125             length++;
1126             break;
1127
1128         case 0370:
1129         case 0371:
1130         case 0372:
1131             break;
1132
1133         case 0373:
1134             length++;
1135             break;
1136
1137         case 0374:
1138             eat = EA_XMMVSIB;
1139             break;
1140
1141         case 0375:
1142             eat = EA_YMMVSIB;
1143             break;
1144
1145         case4(0100):
1146         case4(0110):
1147         case4(0120):
1148         case4(0130):
1149         case4(0200):
1150         case4(0204):
1151         case4(0210):
1152         case4(0214):
1153         case4(0220):
1154         case4(0224):
1155         case4(0230):
1156         case4(0234):
1157             {
1158                 ea ea_data;
1159                 int rfield;
1160                 opflags_t rflags;
1161                 struct operand *opy = &ins->oprs[op2];
1162
1163                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1164
1165                 if (c <= 0177) {
1166                     /* pick rfield from operand b (opx) */
1167                     rflags = regflag(opx);
1168                     rfield = nasm_regvals[opx->basereg];
1169                 } else {
1170                     rflags = 0;
1171                     rfield = c & 7;
1172                 }
1173                 if (process_ea(opy, &ea_data, bits,ins->addr_size,
1174                                rfield, rflags) != eat) {
1175                     errfunc(ERR_NONFATAL, "invalid effective address");
1176                     return -1;
1177                 } else {
1178                     ins->rex |= ea_data.rex;
1179                     length += ea_data.size;
1180                 }
1181             }
1182             break;
1183
1184         default:
1185             errfunc(ERR_PANIC, "internal instruction table corrupt"
1186                     ": instruction code \\%o (0x%02X) given", c, c);
1187             break;
1188         }
1189     }
1190
1191     ins->rex &= rex_mask;
1192
1193     if (ins->rex & REX_NH) {
1194         if (ins->rex & REX_H) {
1195             errfunc(ERR_NONFATAL, "instruction cannot use high registers");
1196             return -1;
1197         }
1198         ins->rex &= ~REX_P;        /* Don't force REX prefix due to high reg */
1199     }
1200
1201     if (ins->rex & REX_V) {
1202         int bad32 = REX_R|REX_W|REX_X|REX_B;
1203
1204         if (ins->rex & REX_H) {
1205             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1206             return -1;
1207         }
1208         switch (ins->vex_wlp & 060) {
1209         case 000:
1210         case 040:
1211             ins->rex &= ~REX_W;
1212             break;
1213         case 020:
1214             ins->rex |= REX_W;
1215             bad32 &= ~REX_W;
1216             break;
1217         case 060:
1218             /* Follow REX_W */
1219             break;
1220         }
1221
1222         if (bits != 64 && ((ins->rex & bad32) || ins->vexreg > 7)) {
1223             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1224             return -1;
1225         }
1226         if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B)))
1227             length += 3;
1228         else
1229             length += 2;
1230     } else if (ins->rex & REX_REAL) {
1231         if (ins->rex & REX_H) {
1232             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1233             return -1;
1234         } else if (bits == 64) {
1235             length++;
1236         } else if ((ins->rex & REX_L) &&
1237                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1238                    cpu >= IF_X86_64) {
1239             /* LOCK-as-REX.R */
1240             assert_no_prefix(ins, PPS_LOCK);
1241             lockcheck = false;  /* Already errored, no need for warning */
1242             length++;
1243         } else {
1244             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1245             return -1;
1246         }
1247     }
1248
1249     if (has_prefix(ins, PPS_LOCK, P_LOCK) && lockcheck &&
1250         (!(temp->flags & IF_LOCK) || !is_class(MEMORY, ins->oprs[0].type))) {
1251         errfunc(ERR_WARNING | ERR_WARN_LOCK | ERR_PASS2 ,
1252                 "instruction is not lockable");
1253     }
1254
1255     bad_hle_warn(ins, hleok);
1256
1257     return length;
1258 }
1259
1260 #define EMIT_REX()                                                              \
1261     if (!(ins->rex & REX_V) && (ins->rex & REX_REAL) && (bits == 64)) { \
1262         ins->rex = (ins->rex & REX_REAL)|REX_P;                                 \
1263         out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG);        \
1264         ins->rex = 0;                                                           \
1265         offset += 1;                                                            \
1266     }
1267
1268 static void gencode(int32_t segment, int64_t offset, int bits,
1269                     insn * ins, const struct itemplate *temp,
1270                     int64_t insn_end)
1271 {
1272     static const char condval[] = {   /* conditional opcodes */
1273         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1274         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1275         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1276     };
1277     uint8_t c;
1278     uint8_t bytes[4];
1279     int64_t size;
1280     int64_t data;
1281     int op1, op2;
1282     struct operand *opx;
1283     const uint8_t *codes = temp->code;
1284     uint8_t opex = 0;
1285     enum ea_type eat = EA_SCALAR;
1286
1287     while (*codes) {
1288         c = *codes++;
1289         op1 = (c & 3) + ((opex & 1) << 2);
1290         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
1291         opx = &ins->oprs[op1];
1292         opex = 0;                /* For the next iteration */
1293
1294         switch (c) {
1295         case 01:
1296         case 02:
1297         case 03:
1298         case 04:
1299             EMIT_REX();
1300             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1301             codes += c;
1302             offset += c;
1303             break;
1304
1305         case 05:
1306         case 06:
1307         case 07:
1308             opex = c;
1309             break;
1310
1311         case4(010):
1312             EMIT_REX();
1313             bytes[0] = *codes++ + (regval(opx) & 7);
1314             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1315             offset += 1;
1316             break;
1317
1318         case4(014):
1319             /*
1320              * The test for BITS8 and SBYTE here is intended to avoid
1321              * warning on optimizer actions due to SBYTE, while still
1322              * warn on explicit BYTE directives.  Also warn, obviously,
1323              * if the optimizer isn't enabled.
1324              */
1325             if (((opx->type & BITS8) ||
1326                  !(opx->type & temp->opd[op1] & BYTENESS)) &&
1327                 (opx->offset < -128 || opx->offset > 127)) {
1328                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1329                         "signed byte value exceeds bounds");
1330             }
1331             if (opx->segment != NO_SEG) {
1332                 data = opx->offset;
1333                 out(offset, segment, &data, OUT_ADDRESS, 1,
1334                     opx->segment, opx->wrt);
1335             } else {
1336                 bytes[0] = opx->offset;
1337                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1338                     NO_SEG);
1339             }
1340             offset += 1;
1341             break;
1342
1343         case4(020):
1344             if (opx->offset < -256 || opx->offset > 255) {
1345                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1346                         "byte value exceeds bounds");
1347             }
1348             if (opx->segment != NO_SEG) {
1349                 data = opx->offset;
1350                 out(offset, segment, &data, OUT_ADDRESS, 1,
1351                     opx->segment, opx->wrt);
1352             } else {
1353                 bytes[0] = opx->offset;
1354                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1355                     NO_SEG);
1356             }
1357             offset += 1;
1358             break;
1359
1360         case4(024):
1361             if (opx->offset < 0 || opx->offset > 255)
1362                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1363                         "unsigned byte value exceeds bounds");
1364             if (opx->segment != NO_SEG) {
1365                 data = opx->offset;
1366                 out(offset, segment, &data, OUT_ADDRESS, 1,
1367                     opx->segment, opx->wrt);
1368             } else {
1369                 bytes[0] = opx->offset;
1370                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1371                     NO_SEG);
1372             }
1373             offset += 1;
1374             break;
1375
1376         case4(030):
1377             warn_overflow_opd(opx, 2);
1378             data = opx->offset;
1379             out(offset, segment, &data, OUT_ADDRESS, 2,
1380                 opx->segment, opx->wrt);
1381             offset += 2;
1382             break;
1383
1384         case4(034):
1385             if (opx->type & (BITS16 | BITS32))
1386                 size = (opx->type & BITS16) ? 2 : 4;
1387             else
1388                 size = (bits == 16) ? 2 : 4;
1389             warn_overflow_opd(opx, size);
1390             data = opx->offset;
1391             out(offset, segment, &data, OUT_ADDRESS, size,
1392                 opx->segment, opx->wrt);
1393             offset += size;
1394             break;
1395
1396         case4(040):
1397             warn_overflow_opd(opx, 4);
1398             data = opx->offset;
1399             out(offset, segment, &data, OUT_ADDRESS, 4,
1400                 opx->segment, opx->wrt);
1401             offset += 4;
1402             break;
1403
1404         case4(044):
1405             data = opx->offset;
1406             size = ins->addr_size >> 3;
1407             warn_overflow_opd(opx, size);
1408             out(offset, segment, &data, OUT_ADDRESS, size,
1409                 opx->segment, opx->wrt);
1410             offset += size;
1411             break;
1412
1413         case4(050):
1414             if (opx->segment != segment) {
1415                 data = opx->offset;
1416                 out(offset, segment, &data,
1417                     OUT_REL1ADR, insn_end - offset,
1418                     opx->segment, opx->wrt);
1419             } else {
1420                 data = opx->offset - insn_end;
1421                 if (data > 127 || data < -128)
1422                     errfunc(ERR_NONFATAL, "short jump is out of range");
1423                 out(offset, segment, &data,
1424                     OUT_ADDRESS, 1, NO_SEG, NO_SEG);
1425             }
1426             offset += 1;
1427             break;
1428
1429         case4(054):
1430             data = (int64_t)opx->offset;
1431             out(offset, segment, &data, OUT_ADDRESS, 8,
1432                 opx->segment, opx->wrt);
1433             offset += 8;
1434             break;
1435
1436         case4(060):
1437             if (opx->segment != segment) {
1438                 data = opx->offset;
1439                 out(offset, segment, &data,
1440                     OUT_REL2ADR, insn_end - offset,
1441                     opx->segment, opx->wrt);
1442             } else {
1443                 data = opx->offset - insn_end;
1444                 out(offset, segment, &data,
1445                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1446             }
1447             offset += 2;
1448             break;
1449
1450         case4(064):
1451             if (opx->type & (BITS16 | BITS32 | BITS64))
1452                 size = (opx->type & BITS16) ? 2 : 4;
1453             else
1454                 size = (bits == 16) ? 2 : 4;
1455             if (opx->segment != segment) {
1456                 data = opx->offset;
1457                 out(offset, segment, &data,
1458                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1459                     insn_end - offset, opx->segment, opx->wrt);
1460             } else {
1461                 data = opx->offset - insn_end;
1462                 out(offset, segment, &data,
1463                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1464             }
1465             offset += size;
1466             break;
1467
1468         case4(070):
1469             if (opx->segment != segment) {
1470                 data = opx->offset;
1471                 out(offset, segment, &data,
1472                     OUT_REL4ADR, insn_end - offset,
1473                     opx->segment, opx->wrt);
1474             } else {
1475                 data = opx->offset - insn_end;
1476                 out(offset, segment, &data,
1477                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1478             }
1479             offset += 4;
1480             break;
1481
1482         case4(074):
1483             if (opx->segment == NO_SEG)
1484                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1485                         " relocatable");
1486             data = 0;
1487             out(offset, segment, &data, OUT_ADDRESS, 2,
1488                 outfmt->segbase(1 + opx->segment),
1489                 opx->wrt);
1490             offset += 2;
1491             break;
1492
1493         case4(0140):
1494             data = opx->offset;
1495             warn_overflow_opd(opx, 2);
1496             if (is_sbyte16(opx)) {
1497                 bytes[0] = data;
1498                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1499                     NO_SEG);
1500                 offset++;
1501             } else {
1502                 out(offset, segment, &data, OUT_ADDRESS, 2,
1503                     opx->segment, opx->wrt);
1504                 offset += 2;
1505             }
1506             break;
1507
1508         case4(0144):
1509             EMIT_REX();
1510             bytes[0] = *codes++;
1511             if (is_sbyte16(opx))
1512                 bytes[0] |= 2;  /* s-bit */
1513             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1514             offset++;
1515             break;
1516
1517         case4(0150):
1518             data = opx->offset;
1519             warn_overflow_opd(opx, 4);
1520             if (is_sbyte32(opx)) {
1521                 bytes[0] = data;
1522                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1523                     NO_SEG);
1524                 offset++;
1525             } else {
1526                 out(offset, segment, &data, OUT_ADDRESS, 4,
1527                     opx->segment, opx->wrt);
1528                 offset += 4;
1529             }
1530             break;
1531
1532         case4(0154):
1533             EMIT_REX();
1534             bytes[0] = *codes++;
1535             if (is_sbyte32(opx))
1536                 bytes[0] |= 2;  /* s-bit */
1537             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1538             offset++;
1539             break;
1540
1541         case 0172:
1542             c = *codes++;
1543             opx = &ins->oprs[c >> 3];
1544             bytes[0] = nasm_regvals[opx->basereg] << 4;
1545             opx = &ins->oprs[c & 7];
1546             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1547                 errfunc(ERR_NONFATAL,
1548                         "non-absolute expression not permitted as argument %d",
1549                         c & 7);
1550             } else {
1551                 if (opx->offset & ~15) {
1552                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1553                             "four-bit argument exceeds bounds");
1554                 }
1555                 bytes[0] |= opx->offset & 15;
1556             }
1557             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1558             offset++;
1559             break;
1560
1561         case 0173:
1562             c = *codes++;
1563             opx = &ins->oprs[c >> 4];
1564             bytes[0] = nasm_regvals[opx->basereg] << 4;
1565             bytes[0] |= c & 15;
1566             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1567             offset++;
1568             break;
1569
1570         case4(0174):
1571             bytes[0] = nasm_regvals[opx->basereg] << 4;
1572             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1573             offset++;
1574             break;
1575
1576         case4(0250):
1577             data = opx->offset;
1578             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1579                 (int32_t)data != (int64_t)data) {
1580                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1581                         "signed dword immediate exceeds bounds");
1582             }
1583             if (is_sbyte32(opx)) {
1584                 bytes[0] = data;
1585                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1586                     NO_SEG);
1587                 offset++;
1588             } else {
1589                 out(offset, segment, &data, OUT_ADDRESS, 4,
1590                     opx->segment, opx->wrt);
1591                 offset += 4;
1592             }
1593             break;
1594
1595         case4(0254):
1596             data = opx->offset;
1597             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1598                 (int32_t)data != (int64_t)data) {
1599                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1600                         "signed dword immediate exceeds bounds");
1601             }
1602             out(offset, segment, &data, OUT_ADDRESS, 4,
1603                 opx->segment, opx->wrt);
1604             offset += 4;
1605             break;
1606
1607         case4(0260):
1608         case 0270:
1609             codes += 2;
1610             if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1611                 bytes[0] = (ins->vex_cm >> 6) ? 0x8f : 0xc4;
1612                 bytes[1] = (ins->vex_cm & 31) | ((~ins->rex & 7) << 5);
1613                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1614                     ((~ins->vexreg & 15)<< 3) | (ins->vex_wlp & 07);
1615                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1616                 offset += 3;
1617             } else {
1618                 bytes[0] = 0xc5;
1619                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1620                     ((~ins->vexreg & 15) << 3) | (ins->vex_wlp & 07);
1621                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1622                 offset += 2;
1623             }
1624             break;
1625
1626         case 0271:
1627         case 0272:
1628         case 0273:
1629             break;
1630
1631         case4(0274):
1632         {
1633             uint64_t uv, um;
1634             int s;
1635
1636             if (ins->rex & REX_W)
1637                 s = 64;
1638             else if (ins->prefixes[PPS_OSIZE] == P_O16)
1639                 s = 16;
1640             else if (ins->prefixes[PPS_OSIZE] == P_O32)
1641                 s = 32;
1642             else
1643                 s = bits;
1644
1645             um = (uint64_t)2 << (s-1);
1646             uv = opx->offset;
1647
1648             if (uv > 127 && uv < (uint64_t)-128 &&
1649                 (uv < um-128 || uv > um-1)) {
1650                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1651                         "signed byte value exceeds bounds");
1652             }
1653             if (opx->segment != NO_SEG) {
1654                 data = uv;
1655                 out(offset, segment, &data, OUT_ADDRESS, 1,
1656                     opx->segment, opx->wrt);
1657             } else {
1658                 bytes[0] = uv;
1659                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1660                     NO_SEG);
1661             }
1662             offset += 1;
1663             break;
1664         }
1665
1666         case4(0300):
1667             break;
1668
1669         case 0310:
1670             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1671                 *bytes = 0x67;
1672                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1673                 offset += 1;
1674             } else
1675                 offset += 0;
1676             break;
1677
1678         case 0311:
1679             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1680                 *bytes = 0x67;
1681                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1682                 offset += 1;
1683             } else
1684                 offset += 0;
1685             break;
1686
1687         case 0312:
1688             break;
1689
1690         case 0313:
1691             ins->rex = 0;
1692             break;
1693
1694         case4(0314):
1695             break;
1696
1697         case 0320:
1698         case 0321:
1699             break;
1700
1701         case 0322:
1702         case 0323:
1703             break;
1704
1705         case 0324:
1706             ins->rex |= REX_W;
1707             break;
1708
1709         case 0325:
1710             break;
1711
1712         case 0330:
1713             *bytes = *codes++ ^ condval[ins->condition];
1714             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1715             offset += 1;
1716             break;
1717
1718         case 0331:
1719             break;
1720
1721         case 0332:
1722         case 0333:
1723             *bytes = c - 0332 + 0xF2;
1724             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1725             offset += 1;
1726             break;
1727
1728         case 0334:
1729             if (ins->rex & REX_R) {
1730                 *bytes = 0xF0;
1731                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1732                 offset += 1;
1733             }
1734             ins->rex &= ~(REX_L|REX_R);
1735             break;
1736
1737         case 0335:
1738             break;
1739
1740         case 0336:
1741         case 0337:
1742             break;
1743
1744         case 0340:
1745             if (ins->oprs[0].segment != NO_SEG)
1746                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1747             else {
1748                 int64_t size = ins->oprs[0].offset;
1749                 if (size > 0)
1750                     out(offset, segment, NULL,
1751                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1752                 offset += size;
1753             }
1754             break;
1755
1756         case 0341:
1757             break;
1758
1759         case 0344:
1760         case 0345:
1761             bytes[0] = c & 1;
1762             switch (ins->oprs[0].basereg) {
1763             case R_CS:
1764                 bytes[0] += 0x0E;
1765                 break;
1766             case R_DS:
1767                 bytes[0] += 0x1E;
1768                 break;
1769             case R_ES:
1770                 bytes[0] += 0x06;
1771                 break;
1772             case R_SS:
1773                 bytes[0] += 0x16;
1774                 break;
1775             default:
1776                 errfunc(ERR_PANIC,
1777                         "bizarre 8086 segment register received");
1778             }
1779             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1780             offset++;
1781             break;
1782
1783         case 0346:
1784         case 0347:
1785             bytes[0] = c & 1;
1786             switch (ins->oprs[0].basereg) {
1787             case R_FS:
1788                 bytes[0] += 0xA0;
1789                 break;
1790             case R_GS:
1791                 bytes[0] += 0xA8;
1792                 break;
1793             default:
1794                 errfunc(ERR_PANIC,
1795                         "bizarre 386 segment register received");
1796             }
1797             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1798             offset++;
1799             break;
1800
1801         case 0360:
1802             break;
1803
1804         case 0361:
1805             bytes[0] = 0x66;
1806             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1807             offset += 1;
1808             break;
1809
1810         case 0362:
1811         case 0363:
1812             bytes[0] = c - 0362 + 0xf2;
1813             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1814             offset += 1;
1815             break;
1816
1817         case 0364:
1818         case 0365:
1819             break;
1820
1821         case 0366:
1822         case 0367:
1823             *bytes = c - 0366 + 0x66;
1824             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1825             offset += 1;
1826             break;
1827
1828         case 0370:
1829         case 0371:
1830             break;
1831
1832         case 0373:
1833             *bytes = bits == 16 ? 3 : 5;
1834             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1835             offset += 1;
1836             break;
1837
1838         case 0374:
1839             eat = EA_XMMVSIB;
1840             break;
1841
1842         case 0375:
1843             eat = EA_YMMVSIB;
1844             break;
1845
1846         case4(0100):
1847         case4(0110):
1848         case4(0120):
1849         case4(0130):
1850         case4(0200):
1851         case4(0204):
1852         case4(0210):
1853         case4(0214):
1854         case4(0220):
1855         case4(0224):
1856         case4(0230):
1857         case4(0234):
1858             {
1859                 ea ea_data;
1860                 int rfield;
1861                 opflags_t rflags;
1862                 uint8_t *p;
1863                 int32_t s;
1864                 struct operand *opy = &ins->oprs[op2];
1865
1866                 if (c <= 0177) {
1867                     /* pick rfield from operand b (opx) */
1868                     rflags = regflag(opx);
1869                     rfield = nasm_regvals[opx->basereg];
1870                 } else {
1871                     /* rfield is constant */
1872                     rflags = 0;
1873                     rfield = c & 7;
1874                 }
1875
1876                 if (process_ea(opy, &ea_data, bits, ins->addr_size,
1877                                rfield, rflags) != eat)
1878                     errfunc(ERR_NONFATAL, "invalid effective address");
1879
1880                 p = bytes;
1881                 *p++ = ea_data.modrm;
1882                 if (ea_data.sib_present)
1883                     *p++ = ea_data.sib;
1884
1885                 s = p - bytes;
1886                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1887
1888                 /*
1889                  * Make sure the address gets the right offset in case
1890                  * the line breaks in the .lst file (BR 1197827)
1891                  */
1892                 offset += s;
1893                 s = 0;
1894
1895                 switch (ea_data.bytes) {
1896                 case 0:
1897                     break;
1898                 case 1:
1899                 case 2:
1900                 case 4:
1901                 case 8:
1902                     data = opy->offset;
1903                     s += ea_data.bytes;
1904                     if (ea_data.rip) {
1905                         if (opy->segment == segment) {
1906                             data -= insn_end;
1907                             if (overflow_signed(data, ea_data.bytes))
1908                                 warn_overflow(ERR_PASS2, ea_data.bytes);
1909                             out(offset, segment, &data, OUT_ADDRESS,
1910                                 ea_data.bytes, NO_SEG, NO_SEG);
1911                         } else {
1912                             /* overflow check in output/linker? */
1913                             out(offset, segment, &data,        OUT_REL4ADR,
1914                                 insn_end - offset, opy->segment, opy->wrt);
1915                         }
1916                     } else {
1917                         if (overflow_general(opy->offset, ins->addr_size >> 3) ||
1918                             signed_bits(opy->offset, ins->addr_size) !=
1919                             signed_bits(opy->offset, ea_data.bytes * 8))
1920                             warn_overflow(ERR_PASS2, ea_data.bytes);
1921
1922                         out(offset, segment, &data, OUT_ADDRESS,
1923                             ea_data.bytes, opy->segment, opy->wrt);
1924                     }
1925                     break;
1926                 default:
1927                     /* Impossible! */
1928                     errfunc(ERR_PANIC,
1929                             "Invalid amount of bytes (%d) for offset?!",
1930                             ea_data.bytes);
1931                     break;
1932                 }
1933                 offset += s;
1934             }
1935             break;
1936
1937         default:
1938             errfunc(ERR_PANIC, "internal instruction table corrupt"
1939                     ": instruction code \\%o (0x%02X) given", c, c);
1940             break;
1941         }
1942     }
1943 }
1944
1945 static opflags_t regflag(const operand * o)
1946 {
1947     if (!is_register(o->basereg))
1948         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1949     return nasm_reg_flags[o->basereg];
1950 }
1951
1952 static int32_t regval(const operand * o)
1953 {
1954     if (!is_register(o->basereg))
1955         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1956     return nasm_regvals[o->basereg];
1957 }
1958
1959 static int op_rexflags(const operand * o, int mask)
1960 {
1961     opflags_t flags;
1962     int val;
1963
1964     if (!is_register(o->basereg))
1965         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1966
1967     flags = nasm_reg_flags[o->basereg];
1968     val = nasm_regvals[o->basereg];
1969
1970     return rexflags(val, flags, mask);
1971 }
1972
1973 static int rexflags(int val, opflags_t flags, int mask)
1974 {
1975     int rex = 0;
1976
1977     if (val >= 8)
1978         rex |= REX_B|REX_X|REX_R;
1979     if (flags & BITS64)
1980         rex |= REX_W;
1981     if (!(REG_HIGH & ~flags))                   /* AH, CH, DH, BH */
1982         rex |= REX_H;
1983     else if (!(REG8 & ~flags) && val >= 4)      /* SPL, BPL, SIL, DIL */
1984         rex |= REX_P;
1985
1986     return rex & mask;
1987 }
1988
1989 static enum match_result find_match(const struct itemplate **tempp,
1990                                     insn *instruction,
1991                                     int32_t segment, int64_t offset, int bits)
1992 {
1993     const struct itemplate *temp;
1994     enum match_result m, merr;
1995     opflags_t xsizeflags[MAX_OPERANDS];
1996     bool opsizemissing = false;
1997     int i;
1998
1999     for (i = 0; i < instruction->operands; i++)
2000         xsizeflags[i] = instruction->oprs[i].type & SIZE_MASK;
2001
2002     merr = MERR_INVALOP;
2003
2004     for (temp = nasm_instructions[instruction->opcode];
2005          temp->opcode != I_none; temp++) {
2006         m = matches(temp, instruction, bits);
2007         if (m == MOK_JUMP) {
2008             if (jmp_match(segment, offset, bits, instruction, temp))
2009                 m = MOK_GOOD;
2010             else
2011                 m = MERR_INVALOP;
2012         } else if (m == MERR_OPSIZEMISSING &&
2013                    (temp->flags & IF_SMASK) != IF_SX) {
2014             /*
2015              * Missing operand size and a candidate for fuzzy matching...
2016              */
2017             for (i = 0; i < temp->operands; i++) {
2018                 if ((temp->opd[i] & SAME_AS) == 0)
2019                     xsizeflags[i] |= temp->opd[i] & SIZE_MASK;
2020             }
2021             opsizemissing = true;
2022         }
2023         if (m > merr)
2024             merr = m;
2025         if (merr == MOK_GOOD)
2026             goto done;
2027     }
2028
2029     /* No match, but see if we can get a fuzzy operand size match... */
2030     if (!opsizemissing)
2031         goto done;
2032
2033     for (i = 0; i < instruction->operands; i++) {
2034         /*
2035          * We ignore extrinsic operand sizes on registers, so we should
2036          * never try to fuzzy-match on them.  This also resolves the case
2037          * when we have e.g. "xmmrm128" in two different positions.
2038          */
2039         if (is_class(REGISTER, instruction->oprs[i].type))
2040             continue;
2041
2042         /* This tests if xsizeflags[i] has more than one bit set */
2043         if ((xsizeflags[i] & (xsizeflags[i]-1)))
2044             goto done;                /* No luck */
2045
2046         instruction->oprs[i].type |= xsizeflags[i]; /* Set the size */
2047     }
2048
2049     /* Try matching again... */
2050     for (temp = nasm_instructions[instruction->opcode];
2051          temp->opcode != I_none; temp++) {
2052         m = matches(temp, instruction, bits);
2053         if (m == MOK_JUMP) {
2054             if (jmp_match(segment, offset, bits, instruction, temp))
2055                 m = MOK_GOOD;
2056             else
2057                 m = MERR_INVALOP;
2058         }
2059         if (m > merr)
2060             merr = m;
2061         if (merr == MOK_GOOD)
2062             goto done;
2063     }
2064
2065 done:
2066     *tempp = temp;
2067     return merr;
2068 }
2069
2070 static enum match_result matches(const struct itemplate *itemp,
2071                                  insn *instruction, int bits)
2072 {
2073     int i, size[MAX_OPERANDS], asize, oprs;
2074     bool opsizemissing = false;
2075
2076     /*
2077      * Check the opcode
2078      */
2079     if (itemp->opcode != instruction->opcode)
2080         return MERR_INVALOP;
2081
2082     /*
2083      * Count the operands
2084      */
2085     if (itemp->operands != instruction->operands)
2086         return MERR_INVALOP;
2087
2088     /*
2089      * Is it legal?
2090      */
2091     if (!(optimizing > 0) && (itemp->flags & IF_OPT))
2092         return MERR_INVALOP;
2093
2094     /*
2095      * Check that no spurious colons or TOs are present
2096      */
2097     for (i = 0; i < itemp->operands; i++)
2098         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
2099             return MERR_INVALOP;
2100
2101     /*
2102      * Process size flags
2103      */
2104     switch (itemp->flags & IF_SMASK) {
2105     case IF_SB:
2106         asize = BITS8;
2107         break;
2108     case IF_SW:
2109         asize = BITS16;
2110         break;
2111     case IF_SD:
2112         asize = BITS32;
2113         break;
2114     case IF_SQ:
2115         asize = BITS64;
2116         break;
2117     case IF_SO:
2118         asize = BITS128;
2119         break;
2120     case IF_SY:
2121         asize = BITS256;
2122         break;
2123     case IF_SZ:
2124         switch (bits) {
2125         case 16:
2126             asize = BITS16;
2127             break;
2128         case 32:
2129             asize = BITS32;
2130             break;
2131         case 64:
2132             asize = BITS64;
2133             break;
2134         default:
2135             asize = 0;
2136             break;
2137         }
2138         break;
2139     default:
2140         asize = 0;
2141         break;
2142     }
2143
2144     if (itemp->flags & IF_ARMASK) {
2145         /* S- flags only apply to a specific operand */
2146         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
2147         memset(size, 0, sizeof size);
2148         size[i] = asize;
2149     } else {
2150         /* S- flags apply to all operands */
2151         for (i = 0; i < MAX_OPERANDS; i++)
2152             size[i] = asize;
2153     }
2154
2155     /*
2156      * Check that the operand flags all match up,
2157      * it's a bit tricky so lets be verbose:
2158      *
2159      * 1) Find out the size of operand. If instruction
2160      *    doesn't have one specified -- we're trying to
2161      *    guess it either from template (IF_S* flag) or
2162      *    from code bits.
2163      *
2164      * 2) If template operand (i) has SAME_AS flag [used for registers only]
2165      *    (ie the same operand as was specified somewhere in template, and
2166      *    this referred operand index is being achieved via ~SAME_AS)
2167      *    we are to be sure that both registers (in template and instruction)
2168      *    do exactly match.
2169      *
2170      * 3) If template operand do not match the instruction OR
2171      *    template has an operand size specified AND this size differ
2172      *    from which instruction has (perhaps we got it from code bits)
2173      *    we are:
2174      *      a)  Check that only size of instruction and operand is differ
2175      *          other characteristics do match
2176      *      b)  Perhaps it's a register specified in instruction so
2177      *          for such a case we just mark that operand as "size
2178      *          missing" and this will turn on fuzzy operand size
2179      *          logic facility (handled by a caller)
2180      */
2181     for (i = 0; i < itemp->operands; i++) {
2182         opflags_t type = instruction->oprs[i].type;
2183         if (!(type & SIZE_MASK))
2184             type |= size[i];
2185
2186         if (itemp->opd[i] & SAME_AS) {
2187             int j = itemp->opd[i] & ~SAME_AS;
2188             if (type != instruction->oprs[j].type ||
2189                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2190                 return MERR_INVALOP;
2191         } else if (itemp->opd[i] & ~type ||
2192             ((itemp->opd[i] & SIZE_MASK) &&
2193              ((itemp->opd[i] ^ type) & SIZE_MASK))) {
2194             if ((itemp->opd[i] & ~type & ~SIZE_MASK) || (type & SIZE_MASK)) {
2195                 return MERR_INVALOP;
2196             } else if (!is_class(REGISTER, type)) {
2197                 /*
2198                  * Note: we don't honor extrinsic operand sizes for registers,
2199                  * so "missing operand size" for a register should be
2200                  * considered a wildcard match rather than an error.
2201                  */
2202                 opsizemissing = true;
2203             }
2204         }
2205     }
2206
2207     if (opsizemissing)
2208         return MERR_OPSIZEMISSING;
2209
2210     /*
2211      * Check operand sizes
2212      */
2213     if (itemp->flags & (IF_SM | IF_SM2)) {
2214         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2215         for (i = 0; i < oprs; i++) {
2216             asize = itemp->opd[i] & SIZE_MASK;
2217             if (asize) {
2218                 for (i = 0; i < oprs; i++)
2219                     size[i] = asize;
2220                 break;
2221             }
2222         }
2223     } else {
2224         oprs = itemp->operands;
2225     }
2226
2227     for (i = 0; i < itemp->operands; i++) {
2228         if (!(itemp->opd[i] & SIZE_MASK) &&
2229             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2230             return MERR_OPSIZEMISMATCH;
2231     }
2232
2233     /*
2234      * Check template is okay at the set cpu level
2235      */
2236     if (((itemp->flags & IF_PLEVEL) > cpu))
2237         return MERR_BADCPU;
2238
2239     /*
2240      * Verify the appropriate long mode flag.
2241      */
2242     if ((itemp->flags & (bits == 64 ? IF_NOLONG : IF_LONG)))
2243         return MERR_BADMODE;
2244
2245     /*
2246      * If we have a HLE prefix, look for the NOHLE flag
2247      */
2248     if ((itemp->flags & IF_NOHLE) &&
2249         (has_prefix(instruction, PPS_REP, P_XACQUIRE) ||
2250          has_prefix(instruction, PPS_REP, P_XRELEASE)))
2251         return MERR_BADHLE;
2252
2253     /*
2254      * Check if special handling needed for Jumps
2255      */
2256     if ((itemp->code[0] & ~1) == 0370)
2257         return MOK_JUMP;
2258
2259     return MOK_GOOD;
2260 }
2261
2262 static enum ea_type process_ea(operand *input, ea *output, int bits,
2263                                int addrbits, int rfield, opflags_t rflags)
2264 {
2265     bool forw_ref = !!(input->opflags & OPFLAG_UNKNOWN);
2266
2267     output->type    = EA_SCALAR;
2268     output->rip     = false;
2269
2270     /* REX flags for the rfield operand */
2271     output->rex     |= rexflags(rfield, rflags, REX_R | REX_P | REX_W | REX_H);
2272
2273     if (is_class(REGISTER, input->type)) {
2274         /*
2275          * It's a direct register.
2276          */
2277         opflags_t f;
2278
2279         if (!is_register(input->basereg))
2280             goto err;
2281
2282         f = regflag(input);
2283
2284         if (!is_class(REG_EA, f))
2285             goto err;
2286
2287         output->rex         |= op_rexflags(input, REX_B | REX_P | REX_W | REX_H);
2288         output->sib_present = false;    /* no SIB necessary */
2289         output->bytes       = 0;        /* no offset necessary either */
2290         output->modrm       = GEN_MODRM(3, rfield, nasm_regvals[input->basereg]);
2291     } else {
2292         /*
2293          * It's a memory reference.
2294          */
2295         if (input->basereg == -1 &&
2296             (input->indexreg == -1 || input->scale == 0)) {
2297             /*
2298              * It's a pure offset.
2299              */
2300             if (bits == 64 && ((input->type & IP_REL) == IP_REL) &&
2301                 input->segment == NO_SEG) {
2302                 nasm_error(ERR_WARNING | ERR_PASS1, "absolute address can not be RIP-relative");
2303                 input->type &= ~IP_REL;
2304                 input->type |= MEMORY;
2305             }
2306
2307             if (input->eaflags & EAF_BYTEOFFS ||
2308                 (input->eaflags & EAF_WORDOFFS &&
2309                  input->disp_size != (addrbits != 16 ? 32 : 16))) {
2310                 nasm_error(ERR_WARNING | ERR_PASS1, "displacement size ignored on absolute address");
2311             }
2312
2313             if (bits == 64 && (~input->type & IP_REL)) {
2314                 output->sib_present = true;
2315                 output->sib         = GEN_SIB(0, 4, 5);
2316                 output->bytes       = 4;
2317                 output->modrm       = GEN_MODRM(0, rfield, 4);
2318                 output->rip         = false;
2319             } else {
2320                 output->sib_present = false;
2321                 output->bytes       = (addrbits != 16 ? 4 : 2);
2322                 output->modrm       = GEN_MODRM(0, rfield, (addrbits != 16 ? 5 : 6));
2323                 output->rip         = bits == 64;
2324             }
2325         } else {
2326             /*
2327              * It's an indirection.
2328              */
2329             int i = input->indexreg, b = input->basereg, s = input->scale;
2330             int32_t seg = input->segment;
2331             int hb = input->hintbase, ht = input->hinttype;
2332             int t, it, bt;              /* register numbers */
2333             opflags_t x, ix, bx;        /* register flags */
2334
2335             if (s == 0)
2336                 i = -1;         /* make this easy, at least */
2337
2338             if (is_register(i)) {
2339                 it = nasm_regvals[i];
2340                 ix = nasm_reg_flags[i];
2341             } else {
2342                 it = -1;
2343                 ix = 0;
2344             }
2345
2346             if (is_register(b)) {
2347                 bt = nasm_regvals[b];
2348                 bx = nasm_reg_flags[b];
2349             } else {
2350                 bt = -1;
2351                 bx = 0;
2352             }
2353
2354             /* if either one are a vector register... */
2355             if ((ix|bx) & (XMMREG|YMMREG) & ~REG_EA) {
2356                 int32_t sok = BITS32 | BITS64;
2357                 int32_t o = input->offset;
2358                 int mod, scale, index, base;
2359
2360                 /*
2361                  * For a vector SIB, one has to be a vector and the other,
2362                  * if present, a GPR.  The vector must be the index operand.
2363                  */
2364                 if (it == -1 || (bx & (XMMREG|YMMREG) & ~REG_EA)) {
2365                     if (s == 0)
2366                         s = 1;
2367                     else if (s != 1)
2368                         goto err;
2369
2370                     t = bt, bt = it, it = t;
2371                     x = bx, bx = ix, ix = x;
2372                 }
2373
2374                 if (bt != -1) {
2375                     if (REG_GPR & ~bx)
2376                         goto err;
2377                     if (!(REG64 & ~bx) || !(REG32 & ~bx))
2378                         sok &= bx;
2379                     else
2380                         goto err;
2381                 }
2382
2383                 /*
2384                  * While we're here, ensure the user didn't specify
2385                  * WORD or QWORD
2386                  */
2387                 if (input->disp_size == 16 || input->disp_size == 64)
2388                     goto err;
2389
2390                 if (addrbits == 16 ||
2391                     (addrbits == 32 && !(sok & BITS32)) ||
2392                     (addrbits == 64 && !(sok & BITS64)))
2393                     goto err;
2394
2395                 output->type = (ix & YMMREG & ~REG_EA)
2396                     ? EA_YMMVSIB : EA_XMMVSIB;
2397
2398                 output->rex |= rexflags(it, ix, REX_X);
2399                 output->rex |= rexflags(bt, bx, REX_B);
2400
2401                 index = it & 7; /* it is known to be != -1 */
2402
2403                 switch (s) {
2404                 case 1:
2405                     scale = 0;
2406                     break;
2407                 case 2:
2408                     scale = 1;
2409                     break;
2410                 case 4:
2411                     scale = 2;
2412                     break;
2413                 case 8:
2414                     scale = 3;
2415                     break;
2416                 default:   /* then what the smeg is it? */
2417                     goto err;    /* panic */
2418                 }
2419
2420                 if (bt == -1) {
2421                     base = 5;
2422                     mod = 0;
2423                 } else {
2424                     base = (bt & 7);
2425                     if (base != REG_NUM_EBP && o == 0 &&
2426                         seg == NO_SEG && !forw_ref &&
2427                         !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2428                         mod = 0;
2429                     else if (input->eaflags & EAF_BYTEOFFS ||
2430                              (o >= -128 && o <= 127 &&
2431                               seg == NO_SEG && !forw_ref &&
2432                               !(input->eaflags & EAF_WORDOFFS)))
2433                         mod = 1;
2434                     else
2435                         mod = 2;
2436                 }
2437
2438                 output->sib_present = true;
2439                 output->bytes       = (bt == -1 || mod == 2 ? 4 : mod);
2440                 output->modrm       = GEN_MODRM(mod, rfield, 4);
2441                 output->sib         = GEN_SIB(scale, index, base);
2442             } else if ((ix|bx) & (BITS32|BITS64)) {
2443                 /*
2444                  * it must be a 32/64-bit memory reference. Firstly we have
2445                  * to check that all registers involved are type E/Rxx.
2446                  */
2447                 int32_t sok = BITS32 | BITS64;
2448                 int32_t o = input->offset;
2449
2450                 if (it != -1) {
2451                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2452                         sok &= ix;
2453                     else
2454                         goto err;
2455                 }
2456
2457                 if (bt != -1) {
2458                     if (REG_GPR & ~bx)
2459                         goto err; /* Invalid register */
2460                     if (~sok & bx & SIZE_MASK)
2461                         goto err; /* Invalid size */
2462                     sok &= bx;
2463                 }
2464
2465                 /*
2466                  * While we're here, ensure the user didn't specify
2467                  * WORD or QWORD
2468                  */
2469                 if (input->disp_size == 16 || input->disp_size == 64)
2470                     goto err;
2471
2472                 if (addrbits == 16 ||
2473                     (addrbits == 32 && !(sok & BITS32)) ||
2474                     (addrbits == 64 && !(sok & BITS64)))
2475                     goto err;
2476
2477                 /* now reorganize base/index */
2478                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2479                     ((hb == b && ht == EAH_NOTBASE) ||
2480                      (hb == i && ht == EAH_MAKEBASE))) {
2481                     /* swap if hints say so */
2482                     t = bt, bt = it, it = t;
2483                     x = bx, bx = ix, ix = x;
2484                 }
2485                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2486                     bt = -1, bx = 0, s++;
2487                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2488                     /* make single reg base, unless hint */
2489                     bt = it, bx = ix, it = -1, ix = 0;
2490                 }
2491                 if (((s == 2 && it != REG_NUM_ESP && !(input->eaflags & EAF_TIMESTWO)) ||
2492                       s == 3 || s == 5 || s == 9) && bt == -1)
2493                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2494                 if (it == -1 && (bt & 7) != REG_NUM_ESP &&
2495                     (input->eaflags & EAF_TIMESTWO))
2496                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2497                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2498                 if (s == 1 && it == REG_NUM_ESP) {
2499                     /* swap ESP into base if scale is 1 */
2500                     t = it, it = bt, bt = t;
2501                     x = ix, ix = bx, bx = x;
2502                 }
2503                 if (it == REG_NUM_ESP ||
2504                     (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2505                     goto err;        /* wrong, for various reasons */
2506
2507                 output->rex |= rexflags(it, ix, REX_X);
2508                 output->rex |= rexflags(bt, bx, REX_B);
2509
2510                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2511                     /* no SIB needed */
2512                     int mod, rm;
2513
2514                     if (bt == -1) {
2515                         rm = 5;
2516                         mod = 0;
2517                     } else {
2518                         rm = (bt & 7);
2519                         if (rm != REG_NUM_EBP && o == 0 &&
2520                             seg == NO_SEG && !forw_ref &&
2521                             !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2522                             mod = 0;
2523                         else if (input->eaflags & EAF_BYTEOFFS ||
2524                                  (o >= -128 && o <= 127 &&
2525                                   seg == NO_SEG && !forw_ref &&
2526                                   !(input->eaflags & EAF_WORDOFFS)))
2527                             mod = 1;
2528                         else
2529                             mod = 2;
2530                     }
2531
2532                     output->sib_present = false;
2533                     output->bytes       = (bt == -1 || mod == 2 ? 4 : mod);
2534                     output->modrm       = GEN_MODRM(mod, rfield, rm);
2535                 } else {
2536                     /* we need a SIB */
2537                     int mod, scale, index, base;
2538
2539                     if (it == -1)
2540                         index = 4, s = 1;
2541                     else
2542                         index = (it & 7);
2543
2544                     switch (s) {
2545                     case 1:
2546                         scale = 0;
2547                         break;
2548                     case 2:
2549                         scale = 1;
2550                         break;
2551                     case 4:
2552                         scale = 2;
2553                         break;
2554                     case 8:
2555                         scale = 3;
2556                         break;
2557                     default:   /* then what the smeg is it? */
2558                         goto err;    /* panic */
2559                     }
2560
2561                     if (bt == -1) {
2562                         base = 5;
2563                         mod = 0;
2564                     } else {
2565                         base = (bt & 7);
2566                         if (base != REG_NUM_EBP && o == 0 &&
2567                             seg == NO_SEG && !forw_ref &&
2568                             !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2569                             mod = 0;
2570                         else if (input->eaflags & EAF_BYTEOFFS ||
2571                                  (o >= -128 && o <= 127 &&
2572                                   seg == NO_SEG && !forw_ref &&
2573                                   !(input->eaflags & EAF_WORDOFFS)))
2574                             mod = 1;
2575                         else
2576                             mod = 2;
2577                     }
2578
2579                     output->sib_present = true;
2580                     output->bytes       = (bt == -1 || mod == 2 ? 4 : mod);
2581                     output->modrm       = GEN_MODRM(mod, rfield, 4);
2582                     output->sib         = GEN_SIB(scale, index, base);
2583                 }
2584             } else {            /* it's 16-bit */
2585                 int mod, rm;
2586                 int16_t o = input->offset;
2587
2588                 /* check for 64-bit long mode */
2589                 if (addrbits == 64)
2590                     goto err;
2591
2592                 /* check all registers are BX, BP, SI or DI */
2593                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI && b != R_DI) ||
2594                     (i != -1 && i != R_BP && i != R_BX && i != R_SI && i != R_DI))
2595                     goto err;
2596
2597                 /* ensure the user didn't specify DWORD/QWORD */
2598                 if (input->disp_size == 32 || input->disp_size == 64)
2599                     goto err;
2600
2601                 if (s != 1 && i != -1)
2602                     goto err;        /* no can do, in 16-bit EA */
2603                 if (b == -1 && i != -1) {
2604                     int tmp = b;
2605                     b = i;
2606                     i = tmp;
2607                 }               /* swap */
2608                 if ((b == R_SI || b == R_DI) && i != -1) {
2609                     int tmp = b;
2610                     b = i;
2611                     i = tmp;
2612                 }
2613                 /* have BX/BP as base, SI/DI index */
2614                 if (b == i)
2615                     goto err;        /* shouldn't ever happen, in theory */
2616                 if (i != -1 && b != -1 &&
2617                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2618                     goto err;        /* invalid combinations */
2619                 if (b == -1)            /* pure offset: handled above */
2620                     goto err;        /* so if it gets to here, panic! */
2621
2622                 rm = -1;
2623                 if (i != -1)
2624                     switch (i * 256 + b) {
2625                     case R_SI * 256 + R_BX:
2626                         rm = 0;
2627                         break;
2628                     case R_DI * 256 + R_BX:
2629                         rm = 1;
2630                         break;
2631                     case R_SI * 256 + R_BP:
2632                         rm = 2;
2633                         break;
2634                     case R_DI * 256 + R_BP:
2635                         rm = 3;
2636                         break;
2637                 } else
2638                     switch (b) {
2639                     case R_SI:
2640                         rm = 4;
2641                         break;
2642                     case R_DI:
2643                         rm = 5;
2644                         break;
2645                     case R_BP:
2646                         rm = 6;
2647                         break;
2648                     case R_BX:
2649                         rm = 7;
2650                         break;
2651                     }
2652                 if (rm == -1)           /* can't happen, in theory */
2653                     goto err;        /* so panic if it does */
2654
2655                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2656                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2657                     mod = 0;
2658                 else if (input->eaflags & EAF_BYTEOFFS ||
2659                          (o >= -128 && o <= 127 && seg == NO_SEG &&
2660                           !forw_ref && !(input->eaflags & EAF_WORDOFFS)))
2661                     mod = 1;
2662                 else
2663                     mod = 2;
2664
2665                 output->sib_present = false;    /* no SIB - it's 16-bit */
2666                 output->bytes       = mod;      /* bytes of offset needed */
2667                 output->modrm       = GEN_MODRM(mod, rfield, rm);
2668             }
2669         }
2670     }
2671
2672     output->size = 1 + output->sib_present + output->bytes;
2673     return output->type;
2674
2675 err:
2676     return output->type = EA_INVALID;
2677 }
2678
2679 static void add_asp(insn *ins, int addrbits)
2680 {
2681     int j, valid;
2682     int defdisp;
2683
2684     valid = (addrbits == 64) ? 64|32 : 32|16;
2685
2686     switch (ins->prefixes[PPS_ASIZE]) {
2687     case P_A16:
2688         valid &= 16;
2689         break;
2690     case P_A32:
2691         valid &= 32;
2692         break;
2693     case P_A64:
2694         valid &= 64;
2695         break;
2696     case P_ASP:
2697         valid &= (addrbits == 32) ? 16 : 32;
2698         break;
2699     default:
2700         break;
2701     }
2702
2703     for (j = 0; j < ins->operands; j++) {
2704         if (is_class(MEMORY, ins->oprs[j].type)) {
2705             opflags_t i, b;
2706
2707             /* Verify as Register */
2708             if (!is_register(ins->oprs[j].indexreg))
2709                 i = 0;
2710             else
2711                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2712
2713             /* Verify as Register */
2714             if (!is_register(ins->oprs[j].basereg))
2715                 b = 0;
2716             else
2717                 b = nasm_reg_flags[ins->oprs[j].basereg];
2718
2719             if (ins->oprs[j].scale == 0)
2720                 i = 0;
2721
2722             if (!i && !b) {
2723                 int ds = ins->oprs[j].disp_size;
2724                 if ((addrbits != 64 && ds > 8) ||
2725                     (addrbits == 64 && ds == 16))
2726                     valid &= ds;
2727             } else {
2728                 if (!(REG16 & ~b))
2729                     valid &= 16;
2730                 if (!(REG32 & ~b))
2731                     valid &= 32;
2732                 if (!(REG64 & ~b))
2733                     valid &= 64;
2734
2735                 if (!(REG16 & ~i))
2736                     valid &= 16;
2737                 if (!(REG32 & ~i))
2738                     valid &= 32;
2739                 if (!(REG64 & ~i))
2740                     valid &= 64;
2741             }
2742         }
2743     }
2744
2745     if (valid & addrbits) {
2746         ins->addr_size = addrbits;
2747     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2748         /* Add an address size prefix */
2749         ins->prefixes[PPS_ASIZE] = (addrbits == 32) ? P_A16 : P_A32;;
2750         ins->addr_size = (addrbits == 32) ? 16 : 32;
2751     } else {
2752         /* Impossible... */
2753         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2754         ins->addr_size = addrbits; /* Error recovery */
2755     }
2756
2757     defdisp = ins->addr_size == 16 ? 16 : 32;
2758
2759     for (j = 0; j < ins->operands; j++) {
2760         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2761             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp) != ins->addr_size) {
2762             /*
2763              * mem_offs sizes must match the address size; if not,
2764              * strip the MEM_OFFS bit and match only EA instructions
2765              */
2766             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2767         }
2768     }
2769 }