assemble.c

   1 /* ----------------------------------------------------------------------- *
   2  *
   3  *   Copyright 1996-2012 The NASM Authors - All Rights Reserved
   4  *   See the file AUTHORS included with the NASM distribution for
   5  *   the specific copyright holders.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following
   9  *   conditions are met:
  10  *
  11  *   * Redistributions of source code must retain the above copyright
  12  *     notice, this list of conditions and the following disclaimer.
  13  *   * Redistributions in binary form must reproduce the above
  14  *     copyright notice, this list of conditions and the following
  15  *     disclaimer in the documentation and/or other materials provided
  16  *     with the distribution.
  17  *
  18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  19  *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  20  *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  21  *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22  *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  23  *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24  *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  25  *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28  *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  29  *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  30  *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31  *
  32  * ----------------------------------------------------------------------- */
  33
  34 /*
  35  * assemble.c   code generation for the Netwide Assembler
  36  *
  37  * the actual codes (C syntax, i.e. octal):
  38  * \0            - terminates the code. (Unless it's a literal of course.)
  39  * \1..\4        - that many literal bytes follow in the code stream
  40  * \5            - add 4 to the primary operand number (b, low octdigit)
  41  * \6            - add 4 to the secondary operand number (a, middle octdigit)
  42  * \7            - add 4 to both the primary and the secondary operand number
  43  * \10..\13      - a literal byte follows in the code stream, to be added
  44  *                 to the register value of operand 0..3
  45  * \14..\17      - a signed byte immediate operand, from operand 0..3
  46  * \20..\23      - a byte immediate operand, from operand 0..3
  47  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  48  * \30..\33      - a word immediate operand, from operand 0..3
  49  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  50  *                 assembly mode or the operand-size override on the operand
  51  * \40..\43      - a long immediate operand, from operand 0..3
  52  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  53  *                 depending on the address size of the instruction.
  54  * \50..\53      - a byte relative operand, from operand 0..3
  55  * \54..\57      - a qword immediate operand, from operand 0..3
  56  * \60..\63      - a word relative operand, from operand 0..3
  57  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  58  *                 assembly mode or the operand-size override on the operand
  59  * \70..\73      - a long relative operand, from operand 0..3
  60  * \74..\77      - a word constant, from the _segment_ part of operand 0..3
  61  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  62  *                 field the register value of operand b.
  63  * \140..\143    - an immediate word or signed byte for operand 0..3
  64  * \144..\147    - or 2 (s-field) into opcode byte if operand 0..3
  65  *                  is a signed byte rather than a word.  Opcode byte follows.
  66  * \150..\153    - an immediate dword or signed byte for operand 0..3
  67  * \154..\157    - or 2 (s-field) into opcode byte if operand 0..3
  68  *                  is a signed byte rather than a dword.  Opcode byte follows.
  69  * \172\ab       - the register number from operand a in bits 7..4, with
  70  *                 the 4-bit immediate from operand b in bits 3..0.
  71  * \173\xab      - the register number from operand a in bits 7..4, with
  72  *                 the value b in bits 3..0.
  73  * \174..\177    - the register number from operand 0..3 in bits 7..4, and
  74  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  75  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  76  *                 field equal to digit b.
  77  * \240          - skip this instruction pattern if HLE prefixes present
  78  * \241          - instruction takes XRELEASE (F3) with or without lock
  79  * \242          - instruction takes XACQUIRE/XRELEASE with or without lock
  80  * \243          - instruction takes XACQUIRE/XRELEASE with lock only
  81  * \250..\253    - same as \150..\153, except warn if the 64-bit operand
  82  *                 is not equal to the truncated and sign-extended 32-bit
  83  *                 operand; used for 32-bit immediates in 64-bit mode.
  84  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
  85  * \260..\263    - this instruction uses VEX/XOP rather than REX, with the
  86  *                 V field taken from operand 0..3.
  87  * \270          - this instruction uses VEX/XOP rather than REX, with the
  88  *                 V field set to 1111b.
  89  *
  90  * VEX/XOP prefixes are followed by the sequence:
  91  * \tmm\wlp        where mm is the M field; and wlp is:
  92  *                 00 wwl lpp
  93  *                 [l0]  ll = 0 for L = 0 (.128, .lz)
  94  *                 [l1]  ll = 1 for L = 1 (.256)
  95  *                 [lig] ll = 2 for L don't care (always assembled as 0)
  96  *
  97  *                 [w0]  ww = 0 for W = 0
  98  *                 [w1 ] ww = 1 for W = 1
  99  *                 [wig] ww = 2 for W don't care (always assembled as 0)
 100  *                 [ww]  ww = 3 for W used as REX.W
 101  *
 102  * t = 0 for VEX (C4/C5), t = 1 for XOP (8F).
 103  *
 104  * \274..\277    - a signed byte immediate operand, from operand 0..3,
 105  *                 which is to be extended to the operand size.
 106  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
 107  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
 108  * \312          - (disassembler only) invalid with non-default address size.
 109  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
 110  * \314          - (disassembler only) invalid with REX.B
 111  * \315          - (disassembler only) invalid with REX.X
 112  * \316          - (disassembler only) invalid with REX.R
 113  * \317          - (disassembler only) invalid with REX.W
 114  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
 115  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
 116  * \322          - indicates that this instruction is only valid when the
 117  *                 operand size is the default (instruction to disassembler,
 118  *                 generates no code in the assembler)
 119  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
 120  * \324          - indicates 64-bit operand size requiring REX prefix.
 121  * \325          - instruction which always uses spl/bpl/sil/dil
 122  * \330          - a literal byte follows in the code stream, to be added
 123  *                 to the condition code value of the instruction.
 124  * \331          - instruction not valid with REP prefix.  Hint for
 125  *                 disassembler only; for SSE instructions.
 126  * \332          - REP prefix (0xF2 byte) used as opcode extension.
 127  * \333          - REP prefix (0xF3 byte) used as opcode extension.
 128  * \334          - LOCK prefix used as REX.R (used in non-64-bit mode)
 129  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
 130  * \336          - force a REP(E) prefix (0xF2) even if not specified.
 131  * \337          - force a REPNE prefix (0xF3) even if not specified.
 132  *                 \336-\337 are still listed as prefixes in the disassembler.
 133  * \340          - reserve <operand 0> bytes of uninitialized storage.
 134  *                 Operand 0 had better be a segmentless constant.
 135  * \341          - this instruction needs a WAIT "prefix"
 136  * \344,\345     - the PUSH/POP (respectively) codes for CS, DS, ES, SS
 137  *                 (POP is never used for CS) depending on operand 0
 138  * \346,\347     - the second byte of PUSH/POP codes for FS, GS, depending
 139  *                 on operand 0
 140  * \360          - no SSE prefix (== \364\331)
 141  * \361          - 66 SSE prefix (== \366\331)
 142  * \362          - F2 SSE prefix (== \364\332)
 143  * \363          - F3 SSE prefix (== \364\333)
 144  * \364          - operand-size prefix (0x66) not permitted
 145  * \365          - address-size prefix (0x67) not permitted
 146  * \366          - operand-size prefix (0x66) used as opcode extension
 147  * \367          - address-size prefix (0x67) used as opcode extension
 148  * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
 149  *                 370 is used for Jcc, 371 is used for JMP.
 150  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 151  *                 used for conditional jump over longer jump
 152  * \374          - this instruction takes an XMM VSIB memory EA
 153  * \375          - this instruction takes an YMM VSIB memory EA
 154  */
 155
 156 #include "compiler.h"
 157
 158 #include <stdio.h>
 159 #include <string.h>
 160 #include <inttypes.h>
 161
 162 #include "nasm.h"
 163 #include "nasmlib.h"
 164 #include "assemble.h"
 165 #include "insns.h"
 166 #include "tables.h"
 167
 168 enum match_result {
 169     /*
 170      * Matching errors.  These should be sorted so that more specific
 171      * errors come later in the sequence.
 172      */
 173     MERR_INVALOP,
 174     MERR_OPSIZEMISSING,
 175     MERR_OPSIZEMISMATCH,
 176     MERR_BADCPU,
 177     MERR_BADMODE,
 178     /*
 179      * Matching success; the conditional ones first
 180      */
 181     MOK_JUMP,   /* Matching OK but needs jmp_match() */
 182     MOK_GOOD    /* Matching unconditionally OK */
 183 };
 184
 185 typedef struct {
 186     enum ea_type type;            /* what kind of EA is this? */
 187     int sib_present;              /* is a SIB byte necessary? */
 188     int bytes;                    /* # of bytes of offset needed */
 189     int size;                     /* lazy - this is sib+bytes+1 */
 190     uint8_t modrm, sib, rex, rip; /* the bytes themselves */
 191 } ea;
 192
 193 #define GEN_SIB(scale, index, base)                 \
 194         (((scale) << 6) | ((index) << 3) | ((base)))
 195
 196 #define GEN_MODRM(mod, reg, rm)                     \
 197         (((mod) << 6) | (((reg) & 7) << 3) | ((rm) & 7))
 198
 199 static uint32_t cpu;            /* cpu level received from nasm.c */
 200 static efunc errfunc;
 201 static struct ofmt *outfmt;
 202 static ListGen *list;
 203
 204 static int64_t calcsize(int32_t, int64_t, int, insn *, const uint8_t *);
 205 static void gencode(int32_t segment, int64_t offset, int bits,
 206                     insn * ins, const struct itemplate *temp,
 207                     int64_t insn_end);
 208 static enum match_result find_match(const struct itemplate **tempp,
 209                                     insn *instruction,
 210                                     int32_t segment, int64_t offset, int bits);
 211 static enum match_result matches(const struct itemplate *, insn *, int bits);
 212 static opflags_t regflag(const operand *);
 213 static int32_t regval(const operand *);
 214 static int rexflags(int, opflags_t, int);
 215 static int op_rexflags(const operand *, int);
 216 static void add_asp(insn *, int);
 217
 218 static enum ea_type process_ea(operand *, ea *, int, int, int, opflags_t);
 219
 220 static int has_prefix(insn * ins, enum prefix_pos pos, int prefix)
 221 {
 222     return ins->prefixes[pos] == prefix;
 223 }
 224
 225 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 226 {
 227     if (ins->prefixes[pos])
 228         errfunc(ERR_NONFATAL, "invalid %s prefix",
 229                 prefix_name(ins->prefixes[pos]));
 230 }
 231
 232 static const char *size_name(int size)
 233 {
 234     switch (size) {
 235     case 1:
 236         return "byte";
 237     case 2:
 238         return "word";
 239     case 4:
 240         return "dword";
 241     case 8:
 242         return "qword";
 243     case 10:
 244         return "tword";
 245     case 16:
 246         return "oword";
 247     case 32:
 248         return "yword";
 249     default:
 250         return "???";
 251     }
 252 }
 253
 254 static void warn_overflow(int pass, int size)
 255 {
 256     errfunc(ERR_WARNING | pass | ERR_WARN_NOV,
 257             "%s data exceeds bounds", size_name(size));
 258 }
 259
 260 static void warn_overflow_const(int64_t data, int size)
 261 {
 262     if (overflow_general(data, size))
 263         warn_overflow(ERR_PASS1, size);
 264 }
 265
 266 static void warn_overflow_opd(const struct operand *o, int size)
 267 {
 268     if (o->wrt == NO_SEG && o->segment == NO_SEG) {
 269         if (overflow_general(o->offset, size))
 270             warn_overflow(ERR_PASS2, size);
 271     }
 272 }
 273
 274 /*
 275  * This routine wrappers the real output format's output routine,
 276  * in order to pass a copy of the data off to the listing file
 277  * generator at the same time.
 278  */
 279 static void out(int64_t offset, int32_t segto, const void *data,
 280                 enum out_type type, uint64_t size,
 281                 int32_t segment, int32_t wrt)
 282 {
 283     static int32_t lineno = 0;     /* static!!! */
 284     static char *lnfname = NULL;
 285     uint8_t p[8];
 286
 287     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 288         /*
 289          * This is a non-relocated address, and we're going to
 290          * convert it into RAWDATA format.
 291          */
 292         uint8_t *q = p;
 293
 294         if (size > 8) {
 295             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 296             return;
 297         }
 298
 299         WRITEADDR(q, *(int64_t *)data, size);
 300         data = p;
 301         type = OUT_RAWDATA;
 302     }
 303
 304     list->output(offset, data, type, size);
 305
 306     /*
 307      * this call to src_get determines when we call the
 308      * debug-format-specific "linenum" function
 309      * it updates lineno and lnfname to the current values
 310      * returning 0 if "same as last time", -2 if lnfname
 311      * changed, and the amount by which lineno changed,
 312      * if it did. thus, these variables must be static
 313      */
 314
 315     if (src_get(&lineno, &lnfname))
 316         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 317
 318     outfmt->output(segto, data, type, size, segment, wrt);
 319 }
 320
 321 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 322                      insn * ins, const uint8_t *code)
 323 {
 324     int64_t isize;
 325     uint8_t c = code[0];
 326
 327     if ((c != 0370 && c != 0371) || (ins->oprs[0].type & STRICT))
 328         return false;
 329     if (!optimizing)
 330         return false;
 331     if (optimizing < 0 && c == 0371)
 332         return false;
 333
 334     isize = calcsize(segment, offset, bits, ins, code);
 335
 336     if (ins->oprs[0].opflags & OPFLAG_UNKNOWN)
 337         /* Be optimistic in pass 1 */
 338         return true;
 339
 340     if (ins->oprs[0].segment != segment)
 341         return false;
 342
 343     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 344     return (isize >= -128 && isize <= 127); /* is it byte size? */
 345 }
 346
 347 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 348                  insn * instruction, struct ofmt *output, efunc error,
 349                  ListGen * listgen)
 350 {
 351     const struct itemplate *temp;
 352     int j;
 353     enum match_result m;
 354     int64_t insn_end;
 355     int32_t itimes;
 356     int64_t start = offset;
 357     int64_t wsize;              /* size for DB etc. */
 358
 359     errfunc = error;            /* to pass to other functions */
 360     cpu = cp;
 361     outfmt = output;            /* likewise */
 362     list = listgen;             /* and again */
 363
 364     wsize = idata_bytes(instruction->opcode);
 365     if (wsize == -1)
 366         return 0;
 367
 368     if (wsize) {
 369         extop *e;
 370         int32_t t = instruction->times;
 371         if (t < 0)
 372             errfunc(ERR_PANIC,
 373                     "instruction->times < 0 (%ld) in assemble()", t);
 374
 375         while (t--) {           /* repeat TIMES times */
 376             list_for_each(e, instruction->eops) {
 377                 if (e->type == EOT_DB_NUMBER) {
 378                     if (wsize > 8) {
 379                         errfunc(ERR_NONFATAL,
 380                                 "integer supplied to a DT, DO or DY"
 381                                 " instruction");
 382                     } else {
 383                         out(offset, segment, &e->offset,
 384                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 385                         offset += wsize;
 386                     }
 387                 } else if (e->type == EOT_DB_STRING ||
 388                            e->type == EOT_DB_STRING_FREE) {
 389                     int align;
 390
 391                     out(offset, segment, e->stringval,
 392                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 393                     align = e->stringlen % wsize;
 394
 395                     if (align) {
 396                         align = wsize - align;
 397                         out(offset, segment, zero_buffer,
 398                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 399                     }
 400                     offset += e->stringlen + align;
 401                 }
 402             }
 403             if (t > 0 && t == instruction->times - 1) {
 404                 /*
 405                  * Dummy call to list->output to give the offset to the
 406                  * listing module.
 407                  */
 408                 list->output(offset, NULL, OUT_RAWDATA, 0);
 409                 list->uplevel(LIST_TIMES);
 410             }
 411         }
 412         if (instruction->times > 1)
 413             list->downlevel(LIST_TIMES);
 414         return offset - start;
 415     }
 416
 417     if (instruction->opcode == I_INCBIN) {
 418         const char *fname = instruction->eops->stringval;
 419         FILE *fp;
 420
 421         fp = fopen(fname, "rb");
 422         if (!fp) {
 423             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 424                   fname);
 425         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 426             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 427                   fname);
 428         } else {
 429             static char buf[4096];
 430             size_t t = instruction->times;
 431             size_t base = 0;
 432             size_t len;
 433
 434             len = ftell(fp);
 435             if (instruction->eops->next) {
 436                 base = instruction->eops->next->offset;
 437                 len -= base;
 438                 if (instruction->eops->next->next &&
 439                     len > (size_t)instruction->eops->next->next->offset)
 440                     len = (size_t)instruction->eops->next->next->offset;
 441             }
 442             /*
 443              * Dummy call to list->output to give the offset to the
 444              * listing module.
 445              */
 446             list->output(offset, NULL, OUT_RAWDATA, 0);
 447             list->uplevel(LIST_INCBIN);
 448             while (t--) {
 449                 size_t l;
 450
 451                 fseek(fp, base, SEEK_SET);
 452                 l = len;
 453                 while (l > 0) {
 454                     int32_t m;
 455                     m = fread(buf, 1, l > sizeof(buf) ? sizeof(buf) : l, fp);
 456                     if (!m) {
 457                         /*
 458                          * This shouldn't happen unless the file
 459                          * actually changes while we are reading
 460                          * it.
 461                          */
 462                         error(ERR_NONFATAL,
 463                               "`incbin': unexpected EOF while"
 464                               " reading file `%s'", fname);
 465                         t = 0;  /* Try to exit cleanly */
 466                         break;
 467                     }
 468                     out(offset, segment, buf, OUT_RAWDATA, m,
 469                         NO_SEG, NO_SEG);
 470                     l -= m;
 471                 }
 472             }
 473             list->downlevel(LIST_INCBIN);
 474             if (instruction->times > 1) {
 475                 /*
 476                  * Dummy call to list->output to give the offset to the
 477                  * listing module.
 478                  */
 479                 list->output(offset, NULL, OUT_RAWDATA, 0);
 480                 list->uplevel(LIST_TIMES);
 481                 list->downlevel(LIST_TIMES);
 482             }
 483             fclose(fp);
 484             return instruction->times * len;
 485         }
 486         return 0;               /* if we're here, there's an error */
 487     }
 488
 489     /* Check to see if we need an address-size prefix */
 490     add_asp(instruction, bits);
 491
 492     m = find_match(&temp, instruction, segment, offset, bits);
 493
 494     if (m == MOK_GOOD) {
 495         /* Matches! */
 496         int64_t insn_size = calcsize(segment, offset, bits,
 497                                      instruction, temp->code);
 498         itimes = instruction->times;
 499         if (insn_size < 0)  /* shouldn't be, on pass two */
 500             error(ERR_PANIC, "errors made it through from pass one");
 501         else
 502             while (itimes--) {
 503                 for (j = 0; j < MAXPREFIX; j++) {
 504                     uint8_t c = 0;
 505                     switch (instruction->prefixes[j]) {
 506                     case P_WAIT:
 507                         c = 0x9B;
 508                         break;
 509                     case P_LOCK:
 510                         c = 0xF0;
 511                         break;
 512                     case P_REPNE:
 513                     case P_REPNZ:
 514                     case P_XACQUIRE:
 515                         c = 0xF2;
 516                         break;
 517                     case P_REPE:
 518                     case P_REPZ:
 519                     case P_REP:
 520                     case P_XRELEASE:
 521                         c = 0xF3;
 522                         break;
 523                     case R_CS:
 524                         if (bits == 64) {
 525                             error(ERR_WARNING | ERR_PASS2,
 526                                   "cs segment base generated, but will be ignored in 64-bit mode");
 527                         }
 528                         c = 0x2E;
 529                         break;
 530                     case R_DS:
 531                         if (bits == 64) {
 532                             error(ERR_WARNING | ERR_PASS2,
 533                                   "ds segment base generated, but will be ignored in 64-bit mode");
 534                         }
 535                         c = 0x3E;
 536                         break;
 537                     case R_ES:
 538                         if (bits == 64) {
 539                             error(ERR_WARNING | ERR_PASS2,
 540                                   "es segment base generated, but will be ignored in 64-bit mode");
 541                         }
 542                         c = 0x26;
 543                         break;
 544                     case R_FS:
 545                         c = 0x64;
 546                         break;
 547                     case R_GS:
 548                         c = 0x65;
 549                         break;
 550                     case R_SS:
 551                         if (bits == 64) {
 552                             error(ERR_WARNING | ERR_PASS2,
 553                                   "ss segment base generated, but will be ignored in 64-bit mode");
 554                         }
 555                         c = 0x36;
 556                         break;
 557                     case R_SEGR6:
 558                     case R_SEGR7:
 559                         error(ERR_NONFATAL,
 560                               "segr6 and segr7 cannot be used as prefixes");
 561                         break;
 562                     case P_A16:
 563                         if (bits == 64) {
 564                             error(ERR_NONFATAL,
 565                                   "16-bit addressing is not supported "
 566                                   "in 64-bit mode");
 567                         } else if (bits != 16)
 568                             c = 0x67;
 569                         break;
 570                     case P_A32:
 571                         if (bits != 32)
 572                             c = 0x67;
 573                         break;
 574                     case P_A64:
 575                         if (bits != 64) {
 576                             error(ERR_NONFATAL,
 577                                   "64-bit addressing is only supported "
 578                                   "in 64-bit mode");
 579                         }
 580                         break;
 581                     case P_ASP:
 582                         c = 0x67;
 583                         break;
 584                     case P_O16:
 585                         if (bits != 16)
 586                             c = 0x66;
 587                         break;
 588                     case P_O32:
 589                         if (bits == 16)
 590                             c = 0x66;
 591                         break;
 592                     case P_O64:
 593                         /* REX.W */
 594                         break;
 595                     case P_OSP:
 596                         c = 0x66;
 597                         break;
 598                     case P_none:
 599                         break;
 600                     default:
 601                         error(ERR_PANIC, "invalid instruction prefix");
 602                     }
 603                     if (c != 0) {
 604                         out(offset, segment, &c, OUT_RAWDATA, 1,
 605                             NO_SEG, NO_SEG);
 606                         offset++;
 607                     }
 608                 }
 609                 insn_end = offset + insn_size;
 610                 gencode(segment, offset, bits, instruction,
 611                         temp, insn_end);
 612                 offset += insn_size;
 613                 if (itimes > 0 && itimes == instruction->times - 1) {
 614                     /*
 615                      * Dummy call to list->output to give the offset to the
 616                      * listing module.
 617                      */
 618                     list->output(offset, NULL, OUT_RAWDATA, 0);
 619                     list->uplevel(LIST_TIMES);
 620                 }
 621             }
 622         if (instruction->times > 1)
 623             list->downlevel(LIST_TIMES);
 624         return offset - start;
 625     } else {
 626         /* No match */
 627         switch (m) {
 628         case MERR_OPSIZEMISSING:
 629             error(ERR_NONFATAL, "operation size not specified");
 630             break;
 631         case MERR_OPSIZEMISMATCH:
 632             error(ERR_NONFATAL, "mismatch in operand sizes");
 633             break;
 634         case MERR_BADCPU:
 635             error(ERR_NONFATAL, "no instruction for this cpu level");
 636             break;
 637         case MERR_BADMODE:
 638             error(ERR_NONFATAL, "instruction not supported in %d-bit mode",
 639                   bits);
 640             break;
 641         default:
 642             error(ERR_NONFATAL,
 643                   "invalid combination of opcode and operands");
 644             break;
 645         }
 646     }
 647     return 0;
 648 }
 649
 650 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 651                   insn * instruction, efunc error)
 652 {
 653     const struct itemplate *temp;
 654     enum match_result m;
 655
 656     errfunc = error;            /* to pass to other functions */
 657     cpu = cp;
 658
 659     if (instruction->opcode == I_none)
 660         return 0;
 661
 662     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 663         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 664         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 665         instruction->opcode == I_DY) {
 666         extop *e;
 667         int32_t isize, osize, wsize;
 668
 669         isize = 0;
 670         wsize = idata_bytes(instruction->opcode);
 671
 672         list_for_each(e, instruction->eops) {
 673             int32_t align;
 674
 675             osize = 0;
 676             if (e->type == EOT_DB_NUMBER) {
 677                 osize = 1;
 678                 warn_overflow_const(e->offset, wsize);
 679             } else if (e->type == EOT_DB_STRING ||
 680                        e->type == EOT_DB_STRING_FREE)
 681                 osize = e->stringlen;
 682
 683             align = (-osize) % wsize;
 684             if (align < 0)
 685                 align += wsize;
 686             isize += osize + align;
 687         }
 688         return isize * instruction->times;
 689     }
 690
 691     if (instruction->opcode == I_INCBIN) {
 692         const char *fname = instruction->eops->stringval;
 693         FILE *fp;
 694         int64_t val = 0;
 695         size_t len;
 696
 697         fp = fopen(fname, "rb");
 698         if (!fp)
 699             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 700                   fname);
 701         else if (fseek(fp, 0L, SEEK_END) < 0)
 702             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 703                   fname);
 704         else {
 705             len = ftell(fp);
 706             if (instruction->eops->next) {
 707                 len -= instruction->eops->next->offset;
 708                 if (instruction->eops->next->next &&
 709                     len > (size_t)instruction->eops->next->next->offset) {
 710                     len = (size_t)instruction->eops->next->next->offset;
 711                 }
 712             }
 713             val = instruction->times * len;
 714         }
 715         if (fp)
 716             fclose(fp);
 717         return val;
 718     }
 719
 720     /* Check to see if we need an address-size prefix */
 721     add_asp(instruction, bits);
 722
 723     m = find_match(&temp, instruction, segment, offset, bits);
 724     if (m == MOK_GOOD) {
 725         /* we've matched an instruction. */
 726         int64_t isize;
 727         const uint8_t *codes = temp->code;
 728         int j;
 729
 730         isize = calcsize(segment, offset, bits, instruction, codes);
 731         if (isize < 0)
 732             return -1;
 733         for (j = 0; j < MAXPREFIX; j++) {
 734             switch (instruction->prefixes[j]) {
 735             case P_A16:
 736                 if (bits != 16)
 737                     isize++;
 738                 break;
 739             case P_A32:
 740                 if (bits != 32)
 741                     isize++;
 742                 break;
 743             case P_O16:
 744                 if (bits != 16)
 745                     isize++;
 746                 break;
 747             case P_O32:
 748                 if (bits == 16)
 749                     isize++;
 750                 break;
 751             case P_A64:
 752             case P_O64:
 753             case P_none:
 754                 break;
 755             default:
 756                 isize++;
 757                 break;
 758             }
 759         }
 760         return isize * instruction->times;
 761     } else {
 762         return -1;                  /* didn't match any instruction */
 763     }
 764 }
 765
 766 static bool possible_sbyte(operand *o)
 767 {
 768     return o->wrt == NO_SEG && o->segment == NO_SEG &&
 769         !(o->opflags & OPFLAG_UNKNOWN) &&
 770         optimizing >= 0 && !(o->type & STRICT);
 771 }
 772
 773 /* check that opn[op]  is a signed byte of size 16 or 32 */
 774 static bool is_sbyte16(operand *o)
 775 {
 776     int16_t v;
 777
 778     if (!possible_sbyte(o))
 779         return false;
 780
 781     v = o->offset;
 782     return v >= -128 && v <= 127;
 783 }
 784
 785 static bool is_sbyte32(operand *o)
 786 {
 787     int32_t v;
 788
 789     if (!possible_sbyte(o))
 790         return false;
 791
 792     v = o->offset;
 793     return v >= -128 && v <= 127;
 794 }
 795
 796 static void bad_hle_warn(const insn * ins, uint8_t hleok)
 797 {
 798     enum prefixes rep_pfx = ins->prefixes[PPS_REP];
 799     enum whatwarn { w_none, w_lock, w_inval };
 800     static const enum whatwarn warn[2][4] =
 801     {
 802         { w_inval, w_inval, w_none, w_lock }, /* XACQUIRE */
 803         { w_inval, w_none,  w_none, w_lock }, /* XRELEASE */
 804     };
 805     unsigned int n;
 806
 807     n = (unsigned int)rep_pfx - P_XACQUIRE;
 808     if (n > 1)
 809         return;                 /* Not XACQUIRE/XRELEASE */
 810
 811     switch (warn[n][hleok]) {
 812     case w_none:
 813         break;
 814
 815     case w_lock:
 816         if (ins->prefixes[PPS_LOCK] != P_LOCK) {
 817             errfunc(ERR_WARNING | ERR_PASS2,
 818                     "%s with this instruction requires lock",
 819                     prefix_name(rep_pfx));
 820         }
 821         break;
 822
 823     case w_inval:
 824         errfunc(ERR_WARNING | ERR_PASS2,
 825                 "%s invalid with this instruction",
 826                 prefix_name(rep_pfx));
 827         break;
 828     }
 829 }
 830
 831 /* Common construct */
 832 #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
 833
 834 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 835                         insn * ins, const uint8_t *codes)
 836 {
 837     int64_t length = 0;
 838     uint8_t c;
 839     int rex_mask = ~0;
 840     int op1, op2;
 841     struct operand *opx;
 842     uint8_t opex = 0;
 843     enum ea_type eat;
 844     uint8_t hleok = 0;
 845
 846     ins->rex = 0;               /* Ensure REX is reset */
 847     eat = EA_SCALAR;            /* Expect a scalar EA */
 848
 849     if (ins->prefixes[PPS_OSIZE] == P_O64)
 850         ins->rex |= REX_W;
 851
 852     (void)segment;              /* Don't warn that this parameter is unused */
 853     (void)offset;               /* Don't warn that this parameter is unused */
 854
 855     while (*codes) {
 856         c = *codes++;
 857         op1 = (c & 3) + ((opex & 1) << 2);
 858         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
 859         opx = &ins->oprs[op1];
 860         opex = 0;               /* For the next iteration */
 861
 862         switch (c) {
 863         case 01:
 864         case 02:
 865         case 03:
 866         case 04:
 867             codes += c, length += c;
 868             break;
 869
 870         case 05:
 871         case 06:
 872         case 07:
 873             opex = c;
 874             break;
 875
 876         case4(010):
 877             ins->rex |=
 878                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 879             codes++, length++;
 880             break;
 881
 882         case4(014):
 883         case4(020):
 884         case4(024):
 885             length++;
 886             break;
 887
 888         case4(030):
 889             length += 2;
 890             break;
 891
 892         case4(034):
 893             if (opx->type & (BITS16 | BITS32 | BITS64))
 894                 length += (opx->type & BITS16) ? 2 : 4;
 895             else
 896                 length += (bits == 16) ? 2 : 4;
 897             break;
 898
 899         case4(040):
 900             length += 4;
 901             break;
 902
 903         case4(044):
 904             length += ins->addr_size >> 3;
 905             break;
 906
 907         case4(050):
 908             length++;
 909             break;
 910
 911         case4(054):
 912             length += 8; /* MOV reg64/imm */
 913             break;
 914
 915         case4(060):
 916             length += 2;
 917             break;
 918
 919         case4(064):
 920             if (opx->type & (BITS16 | BITS32 | BITS64))
 921                 length += (opx->type & BITS16) ? 2 : 4;
 922             else
 923                 length += (bits == 16) ? 2 : 4;
 924             break;
 925
 926         case4(070):
 927             length += 4;
 928             break;
 929
 930         case4(074):
 931             length += 2;
 932             break;
 933
 934         case4(0140):
 935             length += is_sbyte16(opx) ? 1 : 2;
 936             break;
 937
 938         case4(0144):
 939             codes++;
 940             length++;
 941             break;
 942
 943         case4(0150):
 944             length += is_sbyte32(opx) ? 1 : 4;
 945             break;
 946
 947         case4(0154):
 948             codes++;
 949             length++;
 950             break;
 951
 952         case 0172:
 953         case 0173:
 954             codes++;
 955             length++;
 956             break;
 957
 958         case4(0174):
 959             length++;
 960             break;
 961
 962         case 0240:
 963             if (has_prefix(ins, PPS_REP, P_XACQUIRE) ||
 964                 has_prefix(ins, PPS_REP, P_XRELEASE))
 965                 return -1;
 966             break;
 967
 968         case 0241:
 969         case 0242:
 970         case 0243:
 971             hleok = c & 3;
 972             break;
 973
 974         case4(0250):
 975             length += is_sbyte32(opx) ? 1 : 4;
 976             break;
 977
 978         case4(0254):
 979             length += 4;
 980             break;
 981
 982         case4(0260):
 983             ins->rex |= REX_V;
 984             ins->vexreg = regval(opx);
 985             ins->vex_cm = *codes++;
 986             ins->vex_wlp = *codes++;
 987             break;
 988
 989         case 0270:
 990             ins->rex |= REX_V;
 991             ins->vexreg = 0;
 992             ins->vex_cm = *codes++;
 993             ins->vex_wlp = *codes++;
 994             break;
 995
 996         case4(0274):
 997             length++;
 998             break;
 999
1000         case4(0300):
1001             break;
1002
1003         case 0310:
1004             if (bits == 64)
1005                 return -1;
1006             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
1007             break;
1008
1009         case 0311:
1010             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
1011             break;
1012
1013         case 0312:
1014             break;
1015
1016         case 0313:
1017             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
1018                 has_prefix(ins, PPS_ASIZE, P_A32))
1019                 return -1;
1020             break;
1021
1022         case4(0314):
1023             break;
1024
1025         case 0320:
1026         {
1027             enum prefixes pfx = ins->prefixes[PPS_OSIZE];
1028             if (pfx == P_O16)
1029                 break;
1030             if (pfx != P_none)
1031                 errfunc(ERR_WARNING | ERR_PASS2, "invalid operand size prefix");
1032             else
1033                 ins->prefixes[PPS_OSIZE] = P_O16;
1034             break;
1035         }
1036
1037         case 0321:
1038         {
1039             enum prefixes pfx = ins->prefixes[PPS_OSIZE];
1040             if (pfx == P_O32)
1041                 break;
1042             if (pfx != P_none)
1043                 errfunc(ERR_WARNING | ERR_PASS2, "invalid operand size prefix");
1044             else
1045                 ins->prefixes[PPS_OSIZE] = P_O32;
1046             break;
1047         }
1048
1049         case 0322:
1050             break;
1051
1052         case 0323:
1053             rex_mask &= ~REX_W;
1054             break;
1055
1056         case 0324:
1057             ins->rex |= REX_W;
1058             break;
1059
1060         case 0325:
1061             ins->rex |= REX_NH;
1062             break;
1063
1064         case 0330:
1065             codes++, length++;
1066             break;
1067
1068         case 0331:
1069             break;
1070
1071         case 0332:
1072         case 0333:
1073             length++;
1074             break;
1075
1076         case 0334:
1077             ins->rex |= REX_L;
1078             break;
1079
1080         case 0335:
1081             break;
1082
1083         case 0336:
1084             if (!ins->prefixes[PPS_REP])
1085                 ins->prefixes[PPS_REP] = P_REP;
1086             break;
1087
1088         case 0337:
1089             if (!ins->prefixes[PPS_REP])
1090                 ins->prefixes[PPS_REP] = P_REPNE;
1091             break;
1092
1093         case 0340:
1094             if (ins->oprs[0].segment != NO_SEG)
1095                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1096                         " quantity of BSS space");
1097             else
1098                 length += ins->oprs[0].offset;
1099             break;
1100
1101         case 0341:
1102             if (!ins->prefixes[PPS_WAIT])
1103                 ins->prefixes[PPS_WAIT] = P_WAIT;
1104             break;
1105
1106         case4(0344):
1107             length++;
1108             break;
1109
1110         case 0360:
1111             break;
1112
1113         case 0361:
1114         case 0362:
1115         case 0363:
1116             length++;
1117             break;
1118
1119         case 0364:
1120         case 0365:
1121             break;
1122
1123         case 0366:
1124         case 0367:
1125             length++;
1126             break;
1127
1128         case 0370:
1129         case 0371:
1130         case 0372:
1131             break;
1132
1133         case 0373:
1134             length++;
1135             break;
1136
1137         case 0374:
1138             eat = EA_XMMVSIB;
1139             break;
1140
1141         case 0375:
1142             eat = EA_YMMVSIB;
1143             break;
1144
1145         case4(0100):
1146         case4(0110):
1147         case4(0120):
1148         case4(0130):
1149         case4(0200):
1150         case4(0204):
1151         case4(0210):
1152         case4(0214):
1153         case4(0220):
1154         case4(0224):
1155         case4(0230):
1156         case4(0234):
1157             {
1158                 ea ea_data;
1159                 int rfield;
1160                 opflags_t rflags;
1161                 struct operand *opy = &ins->oprs[op2];
1162
1163                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1164
1165                 if (c <= 0177) {
1166                     /* pick rfield from operand b (opx) */
1167                     rflags = regflag(opx);
1168                     rfield = nasm_regvals[opx->basereg];
1169                 } else {
1170                     rflags = 0;
1171                     rfield = c & 7;
1172                 }
1173                 if (process_ea(opy, &ea_data, bits,ins->addr_size,
1174                                rfield, rflags) != eat) {
1175                     errfunc(ERR_NONFATAL, "invalid effective address");
1176                     return -1;
1177                 } else {
1178                     ins->rex |= ea_data.rex;
1179                     length += ea_data.size;
1180                 }
1181             }
1182             break;
1183
1184         default:
1185             errfunc(ERR_PANIC, "internal instruction table corrupt"
1186                     ": instruction code \\%o (0x%02X) given", c, c);
1187             break;
1188         }
1189     }
1190
1191     ins->rex &= rex_mask;
1192
1193     if (ins->rex & REX_NH) {
1194         if (ins->rex & REX_H) {
1195             errfunc(ERR_NONFATAL, "instruction cannot use high registers");
1196             return -1;
1197         }
1198         ins->rex &= ~REX_P;        /* Don't force REX prefix due to high reg */
1199     }
1200
1201     if (ins->rex & REX_V) {
1202         int bad32 = REX_R|REX_W|REX_X|REX_B;
1203
1204         if (ins->rex & REX_H) {
1205             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1206             return -1;
1207         }
1208         switch (ins->vex_wlp & 060) {
1209         case 000:
1210         case 040:
1211             ins->rex &= ~REX_W;
1212             break;
1213         case 020:
1214             ins->rex |= REX_W;
1215             bad32 &= ~REX_W;
1216             break;
1217         case 060:
1218             /* Follow REX_W */
1219             break;
1220         }
1221
1222         if (bits != 64 && ((ins->rex & bad32) || ins->vexreg > 7)) {
1223             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1224             return -1;
1225         }
1226         if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B)))
1227             length += 3;
1228         else
1229             length += 2;
1230     } else if (ins->rex & REX_REAL) {
1231         if (ins->rex & REX_H) {
1232             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1233             return -1;
1234         } else if (bits == 64) {
1235             length++;
1236         } else if ((ins->rex & REX_L) &&
1237                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1238                    cpu >= IF_X86_64) {
1239             /* LOCK-as-REX.R */
1240             assert_no_prefix(ins, PPS_LOCK);
1241             length++;
1242         } else {
1243             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1244             return -1;
1245         }
1246     }
1247
1248     bad_hle_warn(ins, hleok);
1249
1250     return length;
1251 }
1252
1253 #define EMIT_REX()                                                              \
1254     if (!(ins->rex & REX_V) && (ins->rex & REX_REAL) && (bits == 64)) { \
1255         ins->rex = (ins->rex & REX_REAL)|REX_P;                                 \
1256         out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG);        \
1257         ins->rex = 0;                                                           \
1258         offset += 1;                                                            \
1259     }
1260
1261 static void gencode(int32_t segment, int64_t offset, int bits,
1262                     insn * ins, const struct itemplate *temp,
1263                     int64_t insn_end)
1264 {
1265     static const char condval[] = {   /* conditional opcodes */
1266         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1267         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1268         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1269     };
1270     uint8_t c;
1271     uint8_t bytes[4];
1272     int64_t size;
1273     int64_t data;
1274     int op1, op2;
1275     struct operand *opx;
1276     const uint8_t *codes = temp->code;
1277     uint8_t opex = 0;
1278     enum ea_type eat = EA_SCALAR;
1279
1280     while (*codes) {
1281         c = *codes++;
1282         op1 = (c & 3) + ((opex & 1) << 2);
1283         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
1284         opx = &ins->oprs[op1];
1285         opex = 0;                /* For the next iteration */
1286
1287         switch (c) {
1288         case 01:
1289         case 02:
1290         case 03:
1291         case 04:
1292             EMIT_REX();
1293             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1294             codes += c;
1295             offset += c;
1296             break;
1297
1298         case 05:
1299         case 06:
1300         case 07:
1301             opex = c;
1302             break;
1303
1304         case4(010):
1305             EMIT_REX();
1306             bytes[0] = *codes++ + (regval(opx) & 7);
1307             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1308             offset += 1;
1309             break;
1310
1311         case4(014):
1312             /*
1313              * The test for BITS8 and SBYTE here is intended to avoid
1314              * warning on optimizer actions due to SBYTE, while still
1315              * warn on explicit BYTE directives.  Also warn, obviously,
1316              * if the optimizer isn't enabled.
1317              */
1318             if (((opx->type & BITS8) ||
1319                  !(opx->type & temp->opd[op1] & BYTENESS)) &&
1320                 (opx->offset < -128 || opx->offset > 127)) {
1321                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1322                         "signed byte value exceeds bounds");
1323             }
1324             if (opx->segment != NO_SEG) {
1325                 data = opx->offset;
1326                 out(offset, segment, &data, OUT_ADDRESS, 1,
1327                     opx->segment, opx->wrt);
1328             } else {
1329                 bytes[0] = opx->offset;
1330                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1331                     NO_SEG);
1332             }
1333             offset += 1;
1334             break;
1335
1336         case4(020):
1337             if (opx->offset < -256 || opx->offset > 255) {
1338                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1339                         "byte value exceeds bounds");
1340             }
1341             if (opx->segment != NO_SEG) {
1342                 data = opx->offset;
1343                 out(offset, segment, &data, OUT_ADDRESS, 1,
1344                     opx->segment, opx->wrt);
1345             } else {
1346                 bytes[0] = opx->offset;
1347                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1348                     NO_SEG);
1349             }
1350             offset += 1;
1351             break;
1352
1353         case4(024):
1354             if (opx->offset < 0 || opx->offset > 255)
1355                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1356                         "unsigned byte value exceeds bounds");
1357             if (opx->segment != NO_SEG) {
1358                 data = opx->offset;
1359                 out(offset, segment, &data, OUT_ADDRESS, 1,
1360                     opx->segment, opx->wrt);
1361             } else {
1362                 bytes[0] = opx->offset;
1363                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1364                     NO_SEG);
1365             }
1366             offset += 1;
1367             break;
1368
1369         case4(030):
1370             warn_overflow_opd(opx, 2);
1371             data = opx->offset;
1372             out(offset, segment, &data, OUT_ADDRESS, 2,
1373                 opx->segment, opx->wrt);
1374             offset += 2;
1375             break;
1376
1377         case4(034):
1378             if (opx->type & (BITS16 | BITS32))
1379                 size = (opx->type & BITS16) ? 2 : 4;
1380             else
1381                 size = (bits == 16) ? 2 : 4;
1382             warn_overflow_opd(opx, size);
1383             data = opx->offset;
1384             out(offset, segment, &data, OUT_ADDRESS, size,
1385                 opx->segment, opx->wrt);
1386             offset += size;
1387             break;
1388
1389         case4(040):
1390             warn_overflow_opd(opx, 4);
1391             data = opx->offset;
1392             out(offset, segment, &data, OUT_ADDRESS, 4,
1393                 opx->segment, opx->wrt);
1394             offset += 4;
1395             break;
1396
1397         case4(044):
1398             data = opx->offset;
1399             size = ins->addr_size >> 3;
1400             warn_overflow_opd(opx, size);
1401             out(offset, segment, &data, OUT_ADDRESS, size,
1402                 opx->segment, opx->wrt);
1403             offset += size;
1404             break;
1405
1406         case4(050):
1407             if (opx->segment != segment) {
1408                 data = opx->offset;
1409                 out(offset, segment, &data,
1410                     OUT_REL1ADR, insn_end - offset,
1411                     opx->segment, opx->wrt);
1412             } else {
1413                 data = opx->offset - insn_end;
1414                 if (data > 127 || data < -128)
1415                     errfunc(ERR_NONFATAL, "short jump is out of range");
1416                 out(offset, segment, &data,
1417                     OUT_ADDRESS, 1, NO_SEG, NO_SEG);
1418             }
1419             offset += 1;
1420             break;
1421
1422         case4(054):
1423             data = (int64_t)opx->offset;
1424             out(offset, segment, &data, OUT_ADDRESS, 8,
1425                 opx->segment, opx->wrt);
1426             offset += 8;
1427             break;
1428
1429         case4(060):
1430             if (opx->segment != segment) {
1431                 data = opx->offset;
1432                 out(offset, segment, &data,
1433                     OUT_REL2ADR, insn_end - offset,
1434                     opx->segment, opx->wrt);
1435             } else {
1436                 data = opx->offset - insn_end;
1437                 out(offset, segment, &data,
1438                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1439             }
1440             offset += 2;
1441             break;
1442
1443         case4(064):
1444             if (opx->type & (BITS16 | BITS32 | BITS64))
1445                 size = (opx->type & BITS16) ? 2 : 4;
1446             else
1447                 size = (bits == 16) ? 2 : 4;
1448             if (opx->segment != segment) {
1449                 data = opx->offset;
1450                 out(offset, segment, &data,
1451                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1452                     insn_end - offset, opx->segment, opx->wrt);
1453             } else {
1454                 data = opx->offset - insn_end;
1455                 out(offset, segment, &data,
1456                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1457             }
1458             offset += size;
1459             break;
1460
1461         case4(070):
1462             if (opx->segment != segment) {
1463                 data = opx->offset;
1464                 out(offset, segment, &data,
1465                     OUT_REL4ADR, insn_end - offset,
1466                     opx->segment, opx->wrt);
1467             } else {
1468                 data = opx->offset - insn_end;
1469                 out(offset, segment, &data,
1470                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1471             }
1472             offset += 4;
1473             break;
1474
1475         case4(074):
1476             if (opx->segment == NO_SEG)
1477                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1478                         " relocatable");
1479             data = 0;
1480             out(offset, segment, &data, OUT_ADDRESS, 2,
1481                 outfmt->segbase(1 + opx->segment),
1482                 opx->wrt);
1483             offset += 2;
1484             break;
1485
1486         case4(0140):
1487             data = opx->offset;
1488             warn_overflow_opd(opx, 2);
1489             if (is_sbyte16(opx)) {
1490                 bytes[0] = data;
1491                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1492                     NO_SEG);
1493                 offset++;
1494             } else {
1495                 out(offset, segment, &data, OUT_ADDRESS, 2,
1496                     opx->segment, opx->wrt);
1497                 offset += 2;
1498             }
1499             break;
1500
1501         case4(0144):
1502             EMIT_REX();
1503             bytes[0] = *codes++;
1504             if (is_sbyte16(opx))
1505                 bytes[0] |= 2;  /* s-bit */
1506             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1507             offset++;
1508             break;
1509
1510         case4(0150):
1511             data = opx->offset;
1512             warn_overflow_opd(opx, 4);
1513             if (is_sbyte32(opx)) {
1514                 bytes[0] = data;
1515                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1516                     NO_SEG);
1517                 offset++;
1518             } else {
1519                 out(offset, segment, &data, OUT_ADDRESS, 4,
1520                     opx->segment, opx->wrt);
1521                 offset += 4;
1522             }
1523             break;
1524
1525         case4(0154):
1526             EMIT_REX();
1527             bytes[0] = *codes++;
1528             if (is_sbyte32(opx))
1529                 bytes[0] |= 2;  /* s-bit */
1530             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1531             offset++;
1532             break;
1533
1534         case 0172:
1535             c = *codes++;
1536             opx = &ins->oprs[c >> 3];
1537             bytes[0] = nasm_regvals[opx->basereg] << 4;
1538             opx = &ins->oprs[c & 7];
1539             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1540                 errfunc(ERR_NONFATAL,
1541                         "non-absolute expression not permitted as argument %d",
1542                         c & 7);
1543             } else {
1544                 if (opx->offset & ~15) {
1545                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1546                             "four-bit argument exceeds bounds");
1547                 }
1548                 bytes[0] |= opx->offset & 15;
1549             }
1550             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1551             offset++;
1552             break;
1553
1554         case 0173:
1555             c = *codes++;
1556             opx = &ins->oprs[c >> 4];
1557             bytes[0] = nasm_regvals[opx->basereg] << 4;
1558             bytes[0] |= c & 15;
1559             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1560             offset++;
1561             break;
1562
1563         case4(0174):
1564             bytes[0] = nasm_regvals[opx->basereg] << 4;
1565             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1566             offset++;
1567             break;
1568
1569         case4(0240):
1570             break;
1571
1572         case4(0250):
1573             data = opx->offset;
1574             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1575                 (int32_t)data != (int64_t)data) {
1576                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1577                         "signed dword immediate exceeds bounds");
1578             }
1579             if (is_sbyte32(opx)) {
1580                 bytes[0] = data;
1581                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1582                     NO_SEG);
1583                 offset++;
1584             } else {
1585                 out(offset, segment, &data, OUT_ADDRESS, 4,
1586                     opx->segment, opx->wrt);
1587                 offset += 4;
1588             }
1589             break;
1590
1591         case4(0254):
1592             data = opx->offset;
1593             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1594                 (int32_t)data != (int64_t)data) {
1595                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1596                         "signed dword immediate exceeds bounds");
1597             }
1598             out(offset, segment, &data, OUT_ADDRESS, 4,
1599                 opx->segment, opx->wrt);
1600             offset += 4;
1601             break;
1602
1603         case4(0260):
1604         case 0270:
1605             codes += 2;
1606             if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1607                 bytes[0] = (ins->vex_cm >> 6) ? 0x8f : 0xc4;
1608                 bytes[1] = (ins->vex_cm & 31) | ((~ins->rex & 7) << 5);
1609                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1610                     ((~ins->vexreg & 15)<< 3) | (ins->vex_wlp & 07);
1611                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1612                 offset += 3;
1613             } else {
1614                 bytes[0] = 0xc5;
1615                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1616                     ((~ins->vexreg & 15) << 3) | (ins->vex_wlp & 07);
1617                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1618                 offset += 2;
1619             }
1620             break;
1621
1622         case4(0274):
1623         {
1624             uint64_t uv, um;
1625             int s;
1626
1627             if (ins->rex & REX_W)
1628                 s = 64;
1629             else if (ins->prefixes[PPS_OSIZE] == P_O16)
1630                 s = 16;
1631             else if (ins->prefixes[PPS_OSIZE] == P_O32)
1632                 s = 32;
1633             else
1634                 s = bits;
1635
1636             um = (uint64_t)2 << (s-1);
1637             uv = opx->offset;
1638
1639             if (uv > 127 && uv < (uint64_t)-128 &&
1640                 (uv < um-128 || uv > um-1)) {
1641                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1642                         "signed byte value exceeds bounds");
1643             }
1644             if (opx->segment != NO_SEG) {
1645                 data = uv;
1646                 out(offset, segment, &data, OUT_ADDRESS, 1,
1647                     opx->segment, opx->wrt);
1648             } else {
1649                 bytes[0] = uv;
1650                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1651                     NO_SEG);
1652             }
1653             offset += 1;
1654             break;
1655         }
1656
1657         case4(0300):
1658             break;
1659
1660         case 0310:
1661             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1662                 *bytes = 0x67;
1663                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1664                 offset += 1;
1665             } else
1666                 offset += 0;
1667             break;
1668
1669         case 0311:
1670             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1671                 *bytes = 0x67;
1672                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1673                 offset += 1;
1674             } else
1675                 offset += 0;
1676             break;
1677
1678         case 0312:
1679             break;
1680
1681         case 0313:
1682             ins->rex = 0;
1683             break;
1684
1685         case4(0314):
1686             break;
1687
1688         case 0320:
1689         case 0321:
1690             break;
1691
1692         case 0322:
1693         case 0323:
1694             break;
1695
1696         case 0324:
1697             ins->rex |= REX_W;
1698             break;
1699
1700         case 0325:
1701             break;
1702
1703         case 0330:
1704             *bytes = *codes++ ^ condval[ins->condition];
1705             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1706             offset += 1;
1707             break;
1708
1709         case 0331:
1710             break;
1711
1712         case 0332:
1713         case 0333:
1714             *bytes = c - 0332 + 0xF2;
1715             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1716             offset += 1;
1717             break;
1718
1719         case 0334:
1720             if (ins->rex & REX_R) {
1721                 *bytes = 0xF0;
1722                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1723                 offset += 1;
1724             }
1725             ins->rex &= ~(REX_L|REX_R);
1726             break;
1727
1728         case 0335:
1729             break;
1730
1731         case 0336:
1732         case 0337:
1733             break;
1734
1735         case 0340:
1736             if (ins->oprs[0].segment != NO_SEG)
1737                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1738             else {
1739                 int64_t size = ins->oprs[0].offset;
1740                 if (size > 0)
1741                     out(offset, segment, NULL,
1742                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1743                 offset += size;
1744             }
1745             break;
1746
1747         case 0341:
1748             break;
1749
1750         case 0344:
1751         case 0345:
1752             bytes[0] = c & 1;
1753             switch (ins->oprs[0].basereg) {
1754             case R_CS:
1755                 bytes[0] += 0x0E;
1756                 break;
1757             case R_DS:
1758                 bytes[0] += 0x1E;
1759                 break;
1760             case R_ES:
1761                 bytes[0] += 0x06;
1762                 break;
1763             case R_SS:
1764                 bytes[0] += 0x16;
1765                 break;
1766             default:
1767                 errfunc(ERR_PANIC,
1768                         "bizarre 8086 segment register received");
1769             }
1770             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1771             offset++;
1772             break;
1773
1774         case 0346:
1775         case 0347:
1776             bytes[0] = c & 1;
1777             switch (ins->oprs[0].basereg) {
1778             case R_FS:
1779                 bytes[0] += 0xA0;
1780                 break;
1781             case R_GS:
1782                 bytes[0] += 0xA8;
1783                 break;
1784             default:
1785                 errfunc(ERR_PANIC,
1786                         "bizarre 386 segment register received");
1787             }
1788             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1789             offset++;
1790             break;
1791
1792         case 0360:
1793             break;
1794
1795         case 0361:
1796             bytes[0] = 0x66;
1797             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1798             offset += 1;
1799             break;
1800
1801         case 0362:
1802         case 0363:
1803             bytes[0] = c - 0362 + 0xf2;
1804             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1805             offset += 1;
1806             break;
1807
1808         case 0364:
1809         case 0365:
1810             break;
1811
1812         case 0366:
1813         case 0367:
1814             *bytes = c - 0366 + 0x66;
1815             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1816             offset += 1;
1817             break;
1818
1819         case 0370:
1820         case 0371:
1821         case 0372:
1822             break;
1823
1824         case 0373:
1825             *bytes = bits == 16 ? 3 : 5;
1826             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1827             offset += 1;
1828             break;
1829
1830         case 0374:
1831             eat = EA_XMMVSIB;
1832             break;
1833
1834         case 0375:
1835             eat = EA_YMMVSIB;
1836             break;
1837
1838         case4(0100):
1839         case4(0110):
1840         case4(0120):
1841         case4(0130):
1842         case4(0200):
1843         case4(0204):
1844         case4(0210):
1845         case4(0214):
1846         case4(0220):
1847         case4(0224):
1848         case4(0230):
1849         case4(0234):
1850             {
1851                 ea ea_data;
1852                 int rfield;
1853                 opflags_t rflags;
1854                 uint8_t *p;
1855                 int32_t s;
1856                 struct operand *opy = &ins->oprs[op2];
1857
1858                 if (c <= 0177) {
1859                     /* pick rfield from operand b (opx) */
1860                     rflags = regflag(opx);
1861                     rfield = nasm_regvals[opx->basereg];
1862                 } else {
1863                     /* rfield is constant */
1864                     rflags = 0;
1865                     rfield = c & 7;
1866                 }
1867
1868                 if (process_ea(opy, &ea_data, bits, ins->addr_size,
1869                                rfield, rflags) != eat)
1870                     errfunc(ERR_NONFATAL, "invalid effective address");
1871
1872                 p = bytes;
1873                 *p++ = ea_data.modrm;
1874                 if (ea_data.sib_present)
1875                     *p++ = ea_data.sib;
1876
1877                 s = p - bytes;
1878                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1879
1880                 /*
1881                  * Make sure the address gets the right offset in case
1882                  * the line breaks in the .lst file (BR 1197827)
1883                  */
1884                 offset += s;
1885                 s = 0;
1886
1887                 switch (ea_data.bytes) {
1888                 case 0:
1889                     break;
1890                 case 1:
1891                 case 2:
1892                 case 4:
1893                 case 8:
1894                     data = opy->offset;
1895                     s += ea_data.bytes;
1896                     if (ea_data.rip) {
1897                         if (opy->segment == segment) {
1898                             data -= insn_end;
1899                             if (overflow_signed(data, ea_data.bytes))
1900                                 warn_overflow(ERR_PASS2, ea_data.bytes);
1901                             out(offset, segment, &data, OUT_ADDRESS,
1902                                 ea_data.bytes, NO_SEG, NO_SEG);
1903                         } else {
1904                             /* overflow check in output/linker? */
1905                             out(offset, segment, &data,        OUT_REL4ADR,
1906                                 insn_end - offset, opy->segment, opy->wrt);
1907                         }
1908                     } else {
1909                         if (overflow_general(opy->offset, ins->addr_size >> 3) ||
1910                             signed_bits(opy->offset, ins->addr_size) !=
1911                             signed_bits(opy->offset, ea_data.bytes * 8))
1912                             warn_overflow(ERR_PASS2, ea_data.bytes);
1913
1914                         out(offset, segment, &data, OUT_ADDRESS,
1915                             ea_data.bytes, opy->segment, opy->wrt);
1916                     }
1917                     break;
1918                 default:
1919                     /* Impossible! */
1920                     errfunc(ERR_PANIC,
1921                             "Invalid amount of bytes (%d) for offset?!",
1922                             ea_data.bytes);
1923                     break;
1924                 }
1925                 offset += s;
1926             }
1927             break;
1928
1929         default:
1930             errfunc(ERR_PANIC, "internal instruction table corrupt"
1931                     ": instruction code \\%o (0x%02X) given", c, c);
1932             break;
1933         }
1934     }
1935 }
1936
1937 static opflags_t regflag(const operand * o)
1938 {
1939     if (!is_register(o->basereg))
1940         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1941     return nasm_reg_flags[o->basereg];
1942 }
1943
1944 static int32_t regval(const operand * o)
1945 {
1946     if (!is_register(o->basereg))
1947         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1948     return nasm_regvals[o->basereg];
1949 }
1950
1951 static int op_rexflags(const operand * o, int mask)
1952 {
1953     opflags_t flags;
1954     int val;
1955
1956     if (!is_register(o->basereg))
1957         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1958
1959     flags = nasm_reg_flags[o->basereg];
1960     val = nasm_regvals[o->basereg];
1961
1962     return rexflags(val, flags, mask);
1963 }
1964
1965 static int rexflags(int val, opflags_t flags, int mask)
1966 {
1967     int rex = 0;
1968
1969     if (val >= 8)
1970         rex |= REX_B|REX_X|REX_R;
1971     if (flags & BITS64)
1972         rex |= REX_W;
1973     if (!(REG_HIGH & ~flags))                   /* AH, CH, DH, BH */
1974         rex |= REX_H;
1975     else if (!(REG8 & ~flags) && val >= 4)      /* SPL, BPL, SIL, DIL */
1976         rex |= REX_P;
1977
1978     return rex & mask;
1979 }
1980
1981 static enum match_result find_match(const struct itemplate **tempp,
1982                                     insn *instruction,
1983                                     int32_t segment, int64_t offset, int bits)
1984 {
1985     const struct itemplate *temp;
1986     enum match_result m, merr;
1987     opflags_t xsizeflags[MAX_OPERANDS];
1988     bool opsizemissing = false;
1989     int i;
1990
1991     for (i = 0; i < instruction->operands; i++)
1992         xsizeflags[i] = instruction->oprs[i].type & SIZE_MASK;
1993
1994     merr = MERR_INVALOP;
1995
1996     for (temp = nasm_instructions[instruction->opcode];
1997          temp->opcode != I_none; temp++) {
1998         m = matches(temp, instruction, bits);
1999         if (m == MOK_JUMP) {
2000             if (jmp_match(segment, offset, bits, instruction, temp->code))
2001                 m = MOK_GOOD;
2002             else
2003                 m = MERR_INVALOP;
2004         } else if (m == MERR_OPSIZEMISSING &&
2005                    (temp->flags & IF_SMASK) != IF_SX) {
2006             /*
2007              * Missing operand size and a candidate for fuzzy matching...
2008              */
2009             for (i = 0; i < temp->operands; i++) {
2010                 if ((temp->opd[i] & SAME_AS) == 0)
2011                     xsizeflags[i] |= temp->opd[i] & SIZE_MASK;
2012             }
2013             opsizemissing = true;
2014         }
2015         if (m > merr)
2016             merr = m;
2017         if (merr == MOK_GOOD)
2018             goto done;
2019     }
2020
2021     /* No match, but see if we can get a fuzzy operand size match... */
2022     if (!opsizemissing)
2023         goto done;
2024
2025     for (i = 0; i < instruction->operands; i++) {
2026         /*
2027          * We ignore extrinsic operand sizes on registers, so we should
2028          * never try to fuzzy-match on them.  This also resolves the case
2029          * when we have e.g. "xmmrm128" in two different positions.
2030          */
2031         if (is_class(REGISTER, instruction->oprs[i].type))
2032             continue;
2033
2034         /* This tests if xsizeflags[i] has more than one bit set */
2035         if ((xsizeflags[i] & (xsizeflags[i]-1)))
2036             goto done;                /* No luck */
2037
2038         instruction->oprs[i].type |= xsizeflags[i]; /* Set the size */
2039     }
2040
2041     /* Try matching again... */
2042     for (temp = nasm_instructions[instruction->opcode];
2043          temp->opcode != I_none; temp++) {
2044         m = matches(temp, instruction, bits);
2045         if (m == MOK_JUMP) {
2046             if (jmp_match(segment, offset, bits, instruction, temp->code))
2047                 m = MOK_GOOD;
2048             else
2049                 m = MERR_INVALOP;
2050         }
2051         if (m > merr)
2052             merr = m;
2053         if (merr == MOK_GOOD)
2054             goto done;
2055     }
2056
2057 done:
2058     *tempp = temp;
2059     return merr;
2060 }
2061
2062 static enum match_result matches(const struct itemplate *itemp,
2063                                  insn *instruction, int bits)
2064 {
2065     int i, size[MAX_OPERANDS], asize, oprs;
2066     bool opsizemissing = false;
2067
2068     /*
2069      * Check the opcode
2070      */
2071     if (itemp->opcode != instruction->opcode)
2072         return MERR_INVALOP;
2073
2074     /*
2075      * Count the operands
2076      */
2077     if (itemp->operands != instruction->operands)
2078         return MERR_INVALOP;
2079
2080     /*
2081      * Is it legal?
2082      */
2083     if (!(optimizing > 0) && (itemp->flags & IF_OPT))
2084         return MERR_INVALOP;
2085
2086     /*
2087      * Check that no spurious colons or TOs are present
2088      */
2089     for (i = 0; i < itemp->operands; i++)
2090         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
2091             return MERR_INVALOP;
2092
2093     /*
2094      * Process size flags
2095      */
2096     switch (itemp->flags & IF_SMASK) {
2097     case IF_SB:
2098         asize = BITS8;
2099         break;
2100     case IF_SW:
2101         asize = BITS16;
2102         break;
2103     case IF_SD:
2104         asize = BITS32;
2105         break;
2106     case IF_SQ:
2107         asize = BITS64;
2108         break;
2109     case IF_SO:
2110         asize = BITS128;
2111         break;
2112     case IF_SY:
2113         asize = BITS256;
2114         break;
2115     case IF_SZ:
2116         switch (bits) {
2117         case 16:
2118             asize = BITS16;
2119             break;
2120         case 32:
2121             asize = BITS32;
2122             break;
2123         case 64:
2124             asize = BITS64;
2125             break;
2126         default:
2127             asize = 0;
2128             break;
2129         }
2130         break;
2131     default:
2132         asize = 0;
2133         break;
2134     }
2135
2136     if (itemp->flags & IF_ARMASK) {
2137         /* S- flags only apply to a specific operand */
2138         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
2139         memset(size, 0, sizeof size);
2140         size[i] = asize;
2141     } else {
2142         /* S- flags apply to all operands */
2143         for (i = 0; i < MAX_OPERANDS; i++)
2144             size[i] = asize;
2145     }
2146
2147     /*
2148      * Check that the operand flags all match up,
2149      * it's a bit tricky so lets be verbose:
2150      *
2151      * 1) Find out the size of operand. If instruction
2152      *    doesn't have one specified -- we're trying to
2153      *    guess it either from template (IF_S* flag) or
2154      *    from code bits.
2155      *
2156      * 2) If template operand (i) has SAME_AS flag [used for registers only]
2157      *    (ie the same operand as was specified somewhere in template, and
2158      *    this referred operand index is being achieved via ~SAME_AS)
2159      *    we are to be sure that both registers (in template and instruction)
2160      *    do exactly match.
2161      *
2162      * 3) If template operand do not match the instruction OR
2163      *    template has an operand size specified AND this size differ
2164      *    from which instruction has (perhaps we got it from code bits)
2165      *    we are:
2166      *      a)  Check that only size of instruction and operand is differ
2167      *          other characteristics do match
2168      *      b)  Perhaps it's a register specified in instruction so
2169      *          for such a case we just mark that operand as "size
2170      *          missing" and this will turn on fuzzy operand size
2171      *          logic facility (handled by a caller)
2172      */
2173     for (i = 0; i < itemp->operands; i++) {
2174         opflags_t type = instruction->oprs[i].type;
2175         if (!(type & SIZE_MASK))
2176             type |= size[i];
2177
2178         if (itemp->opd[i] & SAME_AS) {
2179             int j = itemp->opd[i] & ~SAME_AS;
2180             if (type != instruction->oprs[j].type ||
2181                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2182                 return MERR_INVALOP;
2183         } else if (itemp->opd[i] & ~type ||
2184             ((itemp->opd[i] & SIZE_MASK) &&
2185              ((itemp->opd[i] ^ type) & SIZE_MASK))) {
2186             if ((itemp->opd[i] & ~type & ~SIZE_MASK) || (type & SIZE_MASK)) {
2187                 return MERR_INVALOP;
2188             } else if (!is_class(REGISTER, type)) {
2189                 /*
2190                  * Note: we don't honor extrinsic operand sizes for registers,
2191                  * so "missing operand size" for a register should be
2192                  * considered a wildcard match rather than an error.
2193                  */
2194                 opsizemissing = true;
2195             }
2196         }
2197     }
2198
2199     if (opsizemissing)
2200         return MERR_OPSIZEMISSING;
2201
2202     /*
2203      * Check operand sizes
2204      */
2205     if (itemp->flags & (IF_SM | IF_SM2)) {
2206         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2207         for (i = 0; i < oprs; i++) {
2208             asize = itemp->opd[i] & SIZE_MASK;
2209             if (asize) {
2210                 for (i = 0; i < oprs; i++)
2211                     size[i] = asize;
2212                 break;
2213             }
2214         }
2215     } else {
2216         oprs = itemp->operands;
2217     }
2218
2219     for (i = 0; i < itemp->operands; i++) {
2220         if (!(itemp->opd[i] & SIZE_MASK) &&
2221             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2222             return MERR_OPSIZEMISMATCH;
2223     }
2224
2225     /*
2226      * Check template is okay at the set cpu level
2227      */
2228     if (((itemp->flags & IF_PLEVEL) > cpu))
2229         return MERR_BADCPU;
2230
2231     /*
2232      * Verify the appropriate long mode flag.
2233      */
2234     if ((itemp->flags & (bits == 64 ? IF_NOLONG : IF_LONG)))
2235         return MERR_BADMODE;
2236
2237     /*
2238      * Check if special handling needed for Jumps
2239      */
2240     if ((itemp->code[0] & 0374) == 0370)
2241         return MOK_JUMP;
2242
2243     return MOK_GOOD;
2244 }
2245
2246 static enum ea_type process_ea(operand *input, ea *output, int bits,
2247                                int addrbits, int rfield, opflags_t rflags)
2248 {
2249     bool forw_ref = !!(input->opflags & OPFLAG_UNKNOWN);
2250
2251     output->type    = EA_SCALAR;
2252     output->rip     = false;
2253
2254     /* REX flags for the rfield operand */
2255     output->rex     |= rexflags(rfield, rflags, REX_R | REX_P | REX_W | REX_H);
2256
2257     if (is_class(REGISTER, input->type)) {
2258         /*
2259          * It's a direct register.
2260          */
2261         opflags_t f;
2262
2263         if (!is_register(input->basereg))
2264             goto err;
2265
2266         f = regflag(input);
2267
2268         if (!is_class(REG_EA, f))
2269             goto err;
2270
2271         output->rex         |= op_rexflags(input, REX_B | REX_P | REX_W | REX_H);
2272         output->sib_present = false;    /* no SIB necessary */
2273         output->bytes       = 0;        /* no offset necessary either */
2274         output->modrm       = GEN_MODRM(3, rfield, nasm_regvals[input->basereg]);
2275     } else {
2276         /*
2277          * It's a memory reference.
2278          */
2279         if (input->basereg == -1 &&
2280             (input->indexreg == -1 || input->scale == 0)) {
2281             /*
2282              * It's a pure offset.
2283              */
2284             if (bits == 64 && ((input->type & IP_REL) == IP_REL) &&
2285                 input->segment == NO_SEG) {
2286                 nasm_error(ERR_WARNING | ERR_PASS1, "absolute address can not be RIP-relative");
2287                 input->type &= ~IP_REL;
2288                 input->type |= MEMORY;
2289             }
2290
2291             if (input->eaflags & EAF_BYTEOFFS ||
2292                 (input->eaflags & EAF_WORDOFFS &&
2293                  input->disp_size != (addrbits != 16 ? 32 : 16))) {
2294                 nasm_error(ERR_WARNING | ERR_PASS1, "displacement size ignored on absolute address");
2295             }
2296
2297             if (bits == 64 && (~input->type & IP_REL)) {
2298                 output->sib_present = true;
2299                 output->sib         = GEN_SIB(0, 4, 5);
2300                 output->bytes       = 4;
2301                 output->modrm       = GEN_MODRM(0, rfield, 4);
2302                 output->rip         = false;
2303             } else {
2304                 output->sib_present = false;
2305                 output->bytes       = (addrbits != 16 ? 4 : 2);
2306                 output->modrm       = GEN_MODRM(0, rfield, (addrbits != 16 ? 5 : 6));
2307                 output->rip         = bits == 64;
2308             }
2309         } else {
2310             /*
2311              * It's an indirection.
2312              */
2313             int i = input->indexreg, b = input->basereg, s = input->scale;
2314             int32_t seg = input->segment;
2315             int hb = input->hintbase, ht = input->hinttype;
2316             int t, it, bt;              /* register numbers */
2317             opflags_t x, ix, bx;        /* register flags */
2318
2319             if (s == 0)
2320                 i = -1;         /* make this easy, at least */
2321
2322             if (is_register(i)) {
2323                 it = nasm_regvals[i];
2324                 ix = nasm_reg_flags[i];
2325             } else {
2326                 it = -1;
2327                 ix = 0;
2328             }
2329
2330             if (is_register(b)) {
2331                 bt = nasm_regvals[b];
2332                 bx = nasm_reg_flags[b];
2333             } else {
2334                 bt = -1;
2335                 bx = 0;
2336             }
2337
2338             /* if either one are a vector register... */
2339             if ((ix|bx) & (XMMREG|YMMREG) & ~REG_EA) {
2340                 int32_t sok = BITS32 | BITS64;
2341                 int32_t o = input->offset;
2342                 int mod, scale, index, base;
2343
2344                 /*
2345                  * For a vector SIB, one has to be a vector and the other,
2346                  * if present, a GPR.  The vector must be the index operand.
2347                  */
2348                 if (it == -1 || (bx & (XMMREG|YMMREG) & ~REG_EA)) {
2349                     if (s == 0)
2350                         s = 1;
2351                     else if (s != 1)
2352                         goto err;
2353
2354                     t = bt, bt = it, it = t;
2355                     x = bx, bx = ix, ix = x;
2356                 }
2357
2358                 if (bt != -1) {
2359                     if (REG_GPR & ~bx)
2360                         goto err;
2361                     if (!(REG64 & ~bx) || !(REG32 & ~bx))
2362                         sok &= bx;
2363                     else
2364                         goto err;
2365                 }
2366
2367                 /*
2368                  * While we're here, ensure the user didn't specify
2369                  * WORD or QWORD
2370                  */
2371                 if (input->disp_size == 16 || input->disp_size == 64)
2372                     goto err;
2373
2374                 if (addrbits == 16 ||
2375                     (addrbits == 32 && !(sok & BITS32)) ||
2376                     (addrbits == 64 && !(sok & BITS64)))
2377                     goto err;
2378
2379                 output->type = (ix & YMMREG & ~REG_EA)
2380                     ? EA_YMMVSIB : EA_XMMVSIB;
2381
2382                 output->rex |= rexflags(it, ix, REX_X);
2383                 output->rex |= rexflags(bt, bx, REX_B);
2384
2385                 index = it & 7; /* it is known to be != -1 */
2386
2387                 switch (s) {
2388                 case 1:
2389                     scale = 0;
2390                     break;
2391                 case 2:
2392                     scale = 1;
2393                     break;
2394                 case 4:
2395                     scale = 2;
2396                     break;
2397                 case 8:
2398                     scale = 3;
2399                     break;
2400                 default:   /* then what the smeg is it? */
2401                     goto err;    /* panic */
2402                 }
2403
2404                 if (bt == -1) {
2405                     base = 5;
2406                     mod = 0;
2407                 } else {
2408                     base = (bt & 7);
2409                     if (base != REG_NUM_EBP && o == 0 &&
2410                         seg == NO_SEG && !forw_ref &&
2411                         !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2412                         mod = 0;
2413                     else if (input->eaflags & EAF_BYTEOFFS ||
2414                              (o >= -128 && o <= 127 &&
2415                               seg == NO_SEG && !forw_ref &&
2416                               !(input->eaflags & EAF_WORDOFFS)))
2417                         mod = 1;
2418                     else
2419                         mod = 2;
2420                 }
2421
2422                 output->sib_present = true;
2423                 output->bytes       = (bt == -1 || mod == 2 ? 4 : mod);
2424                 output->modrm       = GEN_MODRM(mod, rfield, 4);
2425                 output->sib         = GEN_SIB(scale, index, base);
2426             } else if ((ix|bx) & (BITS32|BITS64)) {
2427                 /*
2428                  * it must be a 32/64-bit memory reference. Firstly we have
2429                  * to check that all registers involved are type E/Rxx.
2430                  */
2431                 int32_t sok = BITS32 | BITS64;
2432                 int32_t o = input->offset;
2433
2434                 if (it != -1) {
2435                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2436                         sok &= ix;
2437                     else
2438                         goto err;
2439                 }
2440
2441                 if (bt != -1) {
2442                     if (REG_GPR & ~bx)
2443                         goto err; /* Invalid register */
2444                     if (~sok & bx & SIZE_MASK)
2445                         goto err; /* Invalid size */
2446                     sok &= bx;
2447                 }
2448
2449                 /*
2450                  * While we're here, ensure the user didn't specify
2451                  * WORD or QWORD
2452                  */
2453                 if (input->disp_size == 16 || input->disp_size == 64)
2454                     goto err;
2455
2456                 if (addrbits == 16 ||
2457                     (addrbits == 32 && !(sok & BITS32)) ||
2458                     (addrbits == 64 && !(sok & BITS64)))
2459                     goto err;
2460
2461                 /* now reorganize base/index */
2462                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2463                     ((hb == b && ht == EAH_NOTBASE) ||
2464                      (hb == i && ht == EAH_MAKEBASE))) {
2465                     /* swap if hints say so */
2466                     t = bt, bt = it, it = t;
2467                     x = bx, bx = ix, ix = x;
2468                 }
2469                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2470                     bt = -1, bx = 0, s++;
2471                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2472                     /* make single reg base, unless hint */
2473                     bt = it, bx = ix, it = -1, ix = 0;
2474                 }
2475                 if (((s == 2 && it != REG_NUM_ESP && !(input->eaflags & EAF_TIMESTWO)) ||
2476                       s == 3 || s == 5 || s == 9) && bt == -1)
2477                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2478                 if (it == -1 && (bt & 7) != REG_NUM_ESP &&
2479                     (input->eaflags & EAF_TIMESTWO))
2480                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2481                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2482                 if (s == 1 && it == REG_NUM_ESP) {
2483                     /* swap ESP into base if scale is 1 */
2484                     t = it, it = bt, bt = t;
2485                     x = ix, ix = bx, bx = x;
2486                 }
2487                 if (it == REG_NUM_ESP ||
2488                     (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2489                     goto err;        /* wrong, for various reasons */
2490
2491                 output->rex |= rexflags(it, ix, REX_X);
2492                 output->rex |= rexflags(bt, bx, REX_B);
2493
2494                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2495                     /* no SIB needed */
2496                     int mod, rm;
2497
2498                     if (bt == -1) {
2499                         rm = 5;
2500                         mod = 0;
2501                     } else {
2502                         rm = (bt & 7);
2503                         if (rm != REG_NUM_EBP && o == 0 &&
2504                             seg == NO_SEG && !forw_ref &&
2505                             !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2506                             mod = 0;
2507                         else if (input->eaflags & EAF_BYTEOFFS ||
2508                                  (o >= -128 && o <= 127 &&
2509                                   seg == NO_SEG && !forw_ref &&
2510                                   !(input->eaflags & EAF_WORDOFFS)))
2511                             mod = 1;
2512                         else
2513                             mod = 2;
2514                     }
2515
2516                     output->sib_present = false;
2517                     output->bytes       = (bt == -1 || mod == 2 ? 4 : mod);
2518                     output->modrm       = GEN_MODRM(mod, rfield, rm);
2519                 } else {
2520                     /* we need a SIB */
2521                     int mod, scale, index, base;
2522
2523                     if (it == -1)
2524                         index = 4, s = 1;
2525                     else
2526                         index = (it & 7);
2527
2528                     switch (s) {
2529                     case 1:
2530                         scale = 0;
2531                         break;
2532                     case 2:
2533                         scale = 1;
2534                         break;
2535                     case 4:
2536                         scale = 2;
2537                         break;
2538                     case 8:
2539                         scale = 3;
2540                         break;
2541                     default:   /* then what the smeg is it? */
2542                         goto err;    /* panic */
2543                     }
2544
2545                     if (bt == -1) {
2546                         base = 5;
2547                         mod = 0;
2548                     } else {
2549                         base = (bt & 7);
2550                         if (base != REG_NUM_EBP && o == 0 &&
2551                             seg == NO_SEG && !forw_ref &&
2552                             !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2553                             mod = 0;
2554                         else if (input->eaflags & EAF_BYTEOFFS ||
2555                                  (o >= -128 && o <= 127 &&
2556                                   seg == NO_SEG && !forw_ref &&
2557                                   !(input->eaflags & EAF_WORDOFFS)))
2558                             mod = 1;
2559                         else
2560                             mod = 2;
2561                     }
2562
2563                     output->sib_present = true;
2564                     output->bytes       = (bt == -1 || mod == 2 ? 4 : mod);
2565                     output->modrm       = GEN_MODRM(mod, rfield, 4);
2566                     output->sib         = GEN_SIB(scale, index, base);
2567                 }
2568             } else {            /* it's 16-bit */
2569                 int mod, rm;
2570                 int16_t o = input->offset;
2571
2572                 /* check for 64-bit long mode */
2573                 if (addrbits == 64)
2574                     goto err;
2575
2576                 /* check all registers are BX, BP, SI or DI */
2577                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI && b != R_DI) ||
2578                     (i != -1 && i != R_BP && i != R_BX && i != R_SI && i != R_DI))
2579                     goto err;
2580
2581                 /* ensure the user didn't specify DWORD/QWORD */
2582                 if (input->disp_size == 32 || input->disp_size == 64)
2583                     goto err;
2584
2585                 if (s != 1 && i != -1)
2586                     goto err;        /* no can do, in 16-bit EA */
2587                 if (b == -1 && i != -1) {
2588                     int tmp = b;
2589                     b = i;
2590                     i = tmp;
2591                 }               /* swap */
2592                 if ((b == R_SI || b == R_DI) && i != -1) {
2593                     int tmp = b;
2594                     b = i;
2595                     i = tmp;
2596                 }
2597                 /* have BX/BP as base, SI/DI index */
2598                 if (b == i)
2599                     goto err;        /* shouldn't ever happen, in theory */
2600                 if (i != -1 && b != -1 &&
2601                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2602                     goto err;        /* invalid combinations */
2603                 if (b == -1)            /* pure offset: handled above */
2604                     goto err;        /* so if it gets to here, panic! */
2605
2606                 rm = -1;
2607                 if (i != -1)
2608                     switch (i * 256 + b) {
2609                     case R_SI * 256 + R_BX:
2610                         rm = 0;
2611                         break;
2612                     case R_DI * 256 + R_BX:
2613                         rm = 1;
2614                         break;
2615                     case R_SI * 256 + R_BP:
2616                         rm = 2;
2617                         break;
2618                     case R_DI * 256 + R_BP:
2619                         rm = 3;
2620                         break;
2621                 } else
2622                     switch (b) {
2623                     case R_SI:
2624                         rm = 4;
2625                         break;
2626                     case R_DI:
2627                         rm = 5;
2628                         break;
2629                     case R_BP:
2630                         rm = 6;
2631                         break;
2632                     case R_BX:
2633                         rm = 7;
2634                         break;
2635                     }
2636                 if (rm == -1)           /* can't happen, in theory */
2637                     goto err;        /* so panic if it does */
2638
2639                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2640                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2641                     mod = 0;
2642                 else if (input->eaflags & EAF_BYTEOFFS ||
2643                          (o >= -128 && o <= 127 && seg == NO_SEG &&
2644                           !forw_ref && !(input->eaflags & EAF_WORDOFFS)))
2645                     mod = 1;
2646                 else
2647                     mod = 2;
2648
2649                 output->sib_present = false;    /* no SIB - it's 16-bit */
2650                 output->bytes       = mod;      /* bytes of offset needed */
2651                 output->modrm       = GEN_MODRM(mod, rfield, rm);
2652             }
2653         }
2654     }
2655
2656     output->size = 1 + output->sib_present + output->bytes;
2657     return output->type;
2658
2659 err:
2660     return output->type = EA_INVALID;
2661 }
2662
2663 static void add_asp(insn *ins, int addrbits)
2664 {
2665     int j, valid;
2666     int defdisp;
2667
2668     valid = (addrbits == 64) ? 64|32 : 32|16;
2669
2670     switch (ins->prefixes[PPS_ASIZE]) {
2671     case P_A16:
2672         valid &= 16;
2673         break;
2674     case P_A32:
2675         valid &= 32;
2676         break;
2677     case P_A64:
2678         valid &= 64;
2679         break;
2680     case P_ASP:
2681         valid &= (addrbits == 32) ? 16 : 32;
2682         break;
2683     default:
2684         break;
2685     }
2686
2687     for (j = 0; j < ins->operands; j++) {
2688         if (is_class(MEMORY, ins->oprs[j].type)) {
2689             opflags_t i, b;
2690
2691             /* Verify as Register */
2692             if (!is_register(ins->oprs[j].indexreg))
2693                 i = 0;
2694             else
2695                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2696
2697             /* Verify as Register */
2698             if (!is_register(ins->oprs[j].basereg))
2699                 b = 0;
2700             else
2701                 b = nasm_reg_flags[ins->oprs[j].basereg];
2702
2703             if (ins->oprs[j].scale == 0)
2704                 i = 0;
2705
2706             if (!i && !b) {
2707                 int ds = ins->oprs[j].disp_size;
2708                 if ((addrbits != 64 && ds > 8) ||
2709                     (addrbits == 64 && ds == 16))
2710                     valid &= ds;
2711             } else {
2712                 if (!(REG16 & ~b))
2713                     valid &= 16;
2714                 if (!(REG32 & ~b))
2715                     valid &= 32;
2716                 if (!(REG64 & ~b))
2717                     valid &= 64;
2718
2719                 if (!(REG16 & ~i))
2720                     valid &= 16;
2721                 if (!(REG32 & ~i))
2722                     valid &= 32;
2723                 if (!(REG64 & ~i))
2724                     valid &= 64;
2725             }
2726         }
2727     }
2728
2729     if (valid & addrbits) {
2730         ins->addr_size = addrbits;
2731     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2732         /* Add an address size prefix */
2733         ins->prefixes[PPS_ASIZE] = (addrbits == 32) ? P_A16 : P_A32;;
2734         ins->addr_size = (addrbits == 32) ? 16 : 32;
2735     } else {
2736         /* Impossible... */
2737         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2738         ins->addr_size = addrbits; /* Error recovery */
2739     }
2740
2741     defdisp = ins->addr_size == 16 ? 16 : 32;
2742
2743     for (j = 0; j < ins->operands; j++) {
2744         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2745             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp) != ins->addr_size) {
2746             /*
2747              * mem_offs sizes must match the address size; if not,
2748              * strip the MEM_OFFS bit and match only EA instructions
2749              */
2750             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2751         }
2752     }
2753 }