assemble.c

   1 /* ----------------------------------------------------------------------- *
   2  *
   3  *   Copyright 1996-2012 The NASM Authors - All Rights Reserved
   4  *   See the file AUTHORS included with the NASM distribution for
   5  *   the specific copyright holders.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following
   9  *   conditions are met:
  10  *
  11  *   * Redistributions of source code must retain the above copyright
  12  *     notice, this list of conditions and the following disclaimer.
  13  *   * Redistributions in binary form must reproduce the above
  14  *     copyright notice, this list of conditions and the following
  15  *     disclaimer in the documentation and/or other materials provided
  16  *     with the distribution.
  17  *
  18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  19  *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  20  *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  21  *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22  *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  23  *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24  *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  25  *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28  *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  29  *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  30  *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31  *
  32  * ----------------------------------------------------------------------- */
  33
  34 /*
  35  * assemble.c   code generation for the Netwide Assembler
  36  *
  37  * the actual codes (C syntax, i.e. octal):
  38  * \0            - terminates the code. (Unless it's a literal of course.)
  39  * \1..\4        - that many literal bytes follow in the code stream
  40  * \5            - add 4 to the primary operand number (b, low octdigit)
  41  * \6            - add 4 to the secondary operand number (a, middle octdigit)
  42  * \7            - add 4 to both the primary and the secondary operand number
  43  * \10..\13      - a literal byte follows in the code stream, to be added
  44  *                 to the register value of operand 0..3
  45  * \14..\17      - a signed byte immediate operand, from operand 0..3
  46  * \20..\23      - a byte immediate operand, from operand 0..3
  47  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  48  * \30..\33      - a word immediate operand, from operand 0..3
  49  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  50  *                 assembly mode or the operand-size override on the operand
  51  * \40..\43      - a long immediate operand, from operand 0..3
  52  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  53  *                 depending on the address size of the instruction.
  54  * \50..\53      - a byte relative operand, from operand 0..3
  55  * \54..\57      - a qword immediate operand, from operand 0..3
  56  * \60..\63      - a word relative operand, from operand 0..3
  57  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  58  *                 assembly mode or the operand-size override on the operand
  59  * \70..\73      - a long relative operand, from operand 0..3
  60  * \74..\77      - a word constant, from the _segment_ part of operand 0..3
  61  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  62  *                 field the register value of operand b.
  63  * \140..\143    - an immediate word or signed byte for operand 0..3
  64  * \144..\147    - or 2 (s-field) into opcode byte if operand 0..3
  65  *                  is a signed byte rather than a word.  Opcode byte follows.
  66  * \150..\153    - an immediate dword or signed byte for operand 0..3
  67  * \154..\157    - or 2 (s-field) into opcode byte if operand 0..3
  68  *                  is a signed byte rather than a dword.  Opcode byte follows.
  69  * \172\ab       - the register number from operand a in bits 7..4, with
  70  *                 the 4-bit immediate from operand b in bits 3..0.
  71  * \173\xab      - the register number from operand a in bits 7..4, with
  72  *                 the value b in bits 3..0.
  73  * \174..\177    - the register number from operand 0..3 in bits 7..4, and
  74  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  75  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  76  *                 field equal to digit b.
  77  * \250..\253    - same as \150..\153, except warn if the 64-bit operand
  78  *                 is not equal to the truncated and sign-extended 32-bit
  79  *                 operand; used for 32-bit immediates in 64-bit mode.
  80  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
  81  * \260..\263    - this instruction uses VEX/XOP rather than REX, with the
  82  *                 V field taken from operand 0..3.
  83  * \264          - skip this instruction pattern if HLE prefixes present
  84  * \265          - instruction takes XRELEASE (F3) with or without lock
  85  * \266          - instruction takes XACQUIRE/XRELEASE with or without lock
  86  * \267          - instruction takes XACQUIRE/XRELEASE with lock only
  87  * \270          - this instruction uses VEX/XOP rather than REX, with the
  88  *                 V field set to 1111b.
  89  *
  90  * VEX/XOP prefixes are followed by the sequence:
  91  * \tmm\wlp        where mm is the M field; and wlp is:
  92  *                 00 wwl lpp
  93  *                 [l0]  ll = 0 for L = 0 (.128, .lz)
  94  *                 [l1]  ll = 1 for L = 1 (.256)
  95  *                 [lig] ll = 2 for L don't care (always assembled as 0)
  96  *
  97  *                 [w0]  ww = 0 for W = 0
  98  *                 [w1 ] ww = 1 for W = 1
  99  *                 [wig] ww = 2 for W don't care (always assembled as 0)
 100  *                 [ww]  ww = 3 for W used as REX.W
 101  *
 102  * t = 0 for VEX (C4/C5), t = 1 for XOP (8F).
 103  *
 104  * \274..\277    - a signed byte immediate operand, from operand 0..3,
 105  *                 which is to be extended to the operand size.
 106  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
 107  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
 108  * \312          - (disassembler only) invalid with non-default address size.
 109  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
 110  * \314          - (disassembler only) invalid with REX.B
 111  * \315          - (disassembler only) invalid with REX.X
 112  * \316          - (disassembler only) invalid with REX.R
 113  * \317          - (disassembler only) invalid with REX.W
 114  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
 115  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
 116  * \322          - indicates that this instruction is only valid when the
 117  *                 operand size is the default (instruction to disassembler,
 118  *                 generates no code in the assembler)
 119  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
 120  * \324          - indicates 64-bit operand size requiring REX prefix.
 121  * \325          - instruction which always uses spl/bpl/sil/dil
 122  * \330          - a literal byte follows in the code stream, to be added
 123  *                 to the condition code value of the instruction.
 124  * \331          - instruction not valid with REP prefix.  Hint for
 125  *                 disassembler only; for SSE instructions.
 126  * \332          - REP prefix (0xF2 byte) used as opcode extension.
 127  * \333          - REP prefix (0xF3 byte) used as opcode extension.
 128  * \334          - LOCK prefix used as REX.R (used in non-64-bit mode)
 129  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
 130  * \336          - force a REP(E) prefix (0xF3) even if not specified.
 131  * \337          - force a REPNE prefix (0xF2) even if not specified.
 132  *                 \336-\337 are still listed as prefixes in the disassembler.
 133  * \340          - reserve <operand 0> bytes of uninitialized storage.
 134  *                 Operand 0 had better be a segmentless constant.
 135  * \341          - this instruction needs a WAIT "prefix"
 136  * \344,\345     - the PUSH/POP (respectively) codes for CS, DS, ES, SS
 137  *                 (POP is never used for CS) depending on operand 0
 138  * \346,\347     - the second byte of PUSH/POP codes for FS, GS, depending
 139  *                 on operand 0
 140  * \360          - no SSE prefix (== \364\331)
 141  * \361          - 66 SSE prefix (== \366\331)
 142  * \362          - F2 SSE prefix (== \364\332)
 143  * \363          - F3 SSE prefix (== \364\333)
 144  * \364          - operand-size prefix (0x66) not permitted
 145  * \365          - address-size prefix (0x67) not permitted
 146  * \366          - operand-size prefix (0x66) used as opcode extension
 147  * \367          - address-size prefix (0x67) used as opcode extension
 148  * \370,\371     - match only if operand 0 meets byte jump criteria.
 149  *                 370 is used for Jcc, 371 is used for JMP.
 150  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 151  *                 used for conditional jump over longer jump
 152  * \374          - this instruction takes an XMM VSIB memory EA
 153  * \375          - this instruction takes an YMM VSIB memory EA
 154  */
 155
 156 #include "compiler.h"
 157
 158 #include <stdio.h>
 159 #include <string.h>
 160 #include <inttypes.h>
 161
 162 #include "nasm.h"
 163 #include "nasmlib.h"
 164 #include "assemble.h"
 165 #include "insns.h"
 166 #include "tables.h"
 167
 168 enum match_result {
 169     /*
 170      * Matching errors.  These should be sorted so that more specific
 171      * errors come later in the sequence.
 172      */
 173     MERR_INVALOP,
 174     MERR_OPSIZEMISSING,
 175     MERR_OPSIZEMISMATCH,
 176     MERR_BADCPU,
 177     MERR_BADMODE,
 178     /*
 179      * Matching success; the conditional ones first
 180      */
 181     MOK_JUMP,   /* Matching OK but needs jmp_match() */
 182     MOK_GOOD    /* Matching unconditionally OK */
 183 };
 184
 185 typedef struct {
 186     enum ea_type type;            /* what kind of EA is this? */
 187     int sib_present;              /* is a SIB byte necessary? */
 188     int bytes;                    /* # of bytes of offset needed */
 189     int size;                     /* lazy - this is sib+bytes+1 */
 190     uint8_t modrm, sib, rex, rip; /* the bytes themselves */
 191 } ea;
 192
 193 #define GEN_SIB(scale, index, base)                 \
 194         (((scale) << 6) | ((index) << 3) | ((base)))
 195
 196 #define GEN_MODRM(mod, reg, rm)                     \
 197         (((mod) << 6) | (((reg) & 7) << 3) | ((rm) & 7))
 198
 199 static uint32_t cpu;            /* cpu level received from nasm.c */
 200 static efunc errfunc;
 201 static struct ofmt *outfmt;
 202 static ListGen *list;
 203
 204 static int64_t calcsize(int32_t, int64_t, int, insn *,
 205                         const struct itemplate *);
 206 static void gencode(int32_t segment, int64_t offset, int bits,
 207                     insn * ins, const struct itemplate *temp,
 208                     int64_t insn_end);
 209 static enum match_result find_match(const struct itemplate **tempp,
 210                                     insn *instruction,
 211                                     int32_t segment, int64_t offset, int bits);
 212 static enum match_result matches(const struct itemplate *, insn *, int bits);
 213 static opflags_t regflag(const operand *);
 214 static int32_t regval(const operand *);
 215 static int rexflags(int, opflags_t, int);
 216 static int op_rexflags(const operand *, int);
 217 static void add_asp(insn *, int);
 218
 219 static enum ea_type process_ea(operand *, ea *, int, int, int, opflags_t);
 220
 221 static int has_prefix(insn * ins, enum prefix_pos pos, int prefix)
 222 {
 223     return ins->prefixes[pos] == prefix;
 224 }
 225
 226 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 227 {
 228     if (ins->prefixes[pos])
 229         errfunc(ERR_NONFATAL, "invalid %s prefix",
 230                 prefix_name(ins->prefixes[pos]));
 231 }
 232
 233 static const char *size_name(int size)
 234 {
 235     switch (size) {
 236     case 1:
 237         return "byte";
 238     case 2:
 239         return "word";
 240     case 4:
 241         return "dword";
 242     case 8:
 243         return "qword";
 244     case 10:
 245         return "tword";
 246     case 16:
 247         return "oword";
 248     case 32:
 249         return "yword";
 250     default:
 251         return "???";
 252     }
 253 }
 254
 255 static void warn_overflow(int pass, int size)
 256 {
 257     errfunc(ERR_WARNING | pass | ERR_WARN_NOV,
 258             "%s data exceeds bounds", size_name(size));
 259 }
 260
 261 static void warn_overflow_const(int64_t data, int size)
 262 {
 263     if (overflow_general(data, size))
 264         warn_overflow(ERR_PASS1, size);
 265 }
 266
 267 static void warn_overflow_opd(const struct operand *o, int size)
 268 {
 269     if (o->wrt == NO_SEG && o->segment == NO_SEG) {
 270         if (overflow_general(o->offset, size))
 271             warn_overflow(ERR_PASS2, size);
 272     }
 273 }
 274
 275 /*
 276  * This routine wrappers the real output format's output routine,
 277  * in order to pass a copy of the data off to the listing file
 278  * generator at the same time.
 279  */
 280 static void out(int64_t offset, int32_t segto, const void *data,
 281                 enum out_type type, uint64_t size,
 282                 int32_t segment, int32_t wrt)
 283 {
 284     static int32_t lineno = 0;     /* static!!! */
 285     static char *lnfname = NULL;
 286     uint8_t p[8];
 287
 288     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 289         /*
 290          * This is a non-relocated address, and we're going to
 291          * convert it into RAWDATA format.
 292          */
 293         uint8_t *q = p;
 294
 295         if (size > 8) {
 296             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 297             return;
 298         }
 299
 300         WRITEADDR(q, *(int64_t *)data, size);
 301         data = p;
 302         type = OUT_RAWDATA;
 303     }
 304
 305     list->output(offset, data, type, size);
 306
 307     /*
 308      * this call to src_get determines when we call the
 309      * debug-format-specific "linenum" function
 310      * it updates lineno and lnfname to the current values
 311      * returning 0 if "same as last time", -2 if lnfname
 312      * changed, and the amount by which lineno changed,
 313      * if it did. thus, these variables must be static
 314      */
 315
 316     if (src_get(&lineno, &lnfname))
 317         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 318
 319     outfmt->output(segto, data, type, size, segment, wrt);
 320 }
 321
 322 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 323                       insn * ins, const struct itemplate *temp)
 324 {
 325     int64_t isize;
 326     const uint8_t *code = temp->code;
 327     uint8_t c = code[0];
 328
 329     if (((c & ~1) != 0370) || (ins->oprs[0].type & STRICT))
 330         return false;
 331     if (!optimizing)
 332         return false;
 333     if (optimizing < 0 && c == 0371)
 334         return false;
 335
 336     isize = calcsize(segment, offset, bits, ins, temp);
 337
 338     if (ins->oprs[0].opflags & OPFLAG_UNKNOWN)
 339         /* Be optimistic in pass 1 */
 340         return true;
 341
 342     if (ins->oprs[0].segment != segment)
 343         return false;
 344
 345     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 346     return (isize >= -128 && isize <= 127); /* is it byte size? */
 347 }
 348
 349 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 350                  insn * instruction, struct ofmt *output, efunc error,
 351                  ListGen * listgen)
 352 {
 353     const struct itemplate *temp;
 354     int j;
 355     enum match_result m;
 356     int64_t insn_end;
 357     int32_t itimes;
 358     int64_t start = offset;
 359     int64_t wsize;              /* size for DB etc. */
 360
 361     errfunc = error;            /* to pass to other functions */
 362     cpu = cp;
 363     outfmt = output;            /* likewise */
 364     list = listgen;             /* and again */
 365
 366     wsize = idata_bytes(instruction->opcode);
 367     if (wsize == -1)
 368         return 0;
 369
 370     if (wsize) {
 371         extop *e;
 372         int32_t t = instruction->times;
 373         if (t < 0)
 374             errfunc(ERR_PANIC,
 375                     "instruction->times < 0 (%ld) in assemble()", t);
 376
 377         while (t--) {           /* repeat TIMES times */
 378             list_for_each(e, instruction->eops) {
 379                 if (e->type == EOT_DB_NUMBER) {
 380                     if (wsize > 8) {
 381                         errfunc(ERR_NONFATAL,
 382                                 "integer supplied to a DT, DO or DY"
 383                                 " instruction");
 384                     } else {
 385                         out(offset, segment, &e->offset,
 386                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 387                         offset += wsize;
 388                     }
 389                 } else if (e->type == EOT_DB_STRING ||
 390                            e->type == EOT_DB_STRING_FREE) {
 391                     int align;
 392
 393                     out(offset, segment, e->stringval,
 394                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 395                     align = e->stringlen % wsize;
 396
 397                     if (align) {
 398                         align = wsize - align;
 399                         out(offset, segment, zero_buffer,
 400                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 401                     }
 402                     offset += e->stringlen + align;
 403                 }
 404             }
 405             if (t > 0 && t == instruction->times - 1) {
 406                 /*
 407                  * Dummy call to list->output to give the offset to the
 408                  * listing module.
 409                  */
 410                 list->output(offset, NULL, OUT_RAWDATA, 0);
 411                 list->uplevel(LIST_TIMES);
 412             }
 413         }
 414         if (instruction->times > 1)
 415             list->downlevel(LIST_TIMES);
 416         return offset - start;
 417     }
 418
 419     if (instruction->opcode == I_INCBIN) {
 420         const char *fname = instruction->eops->stringval;
 421         FILE *fp;
 422
 423         fp = fopen(fname, "rb");
 424         if (!fp) {
 425             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 426                   fname);
 427         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 428             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 429                   fname);
 430         } else {
 431             static char buf[4096];
 432             size_t t = instruction->times;
 433             size_t base = 0;
 434             size_t len;
 435
 436             len = ftell(fp);
 437             if (instruction->eops->next) {
 438                 base = instruction->eops->next->offset;
 439                 len -= base;
 440                 if (instruction->eops->next->next &&
 441                     len > (size_t)instruction->eops->next->next->offset)
 442                     len = (size_t)instruction->eops->next->next->offset;
 443             }
 444             /*
 445              * Dummy call to list->output to give the offset to the
 446              * listing module.
 447              */
 448             list->output(offset, NULL, OUT_RAWDATA, 0);
 449             list->uplevel(LIST_INCBIN);
 450             while (t--) {
 451                 size_t l;
 452
 453                 fseek(fp, base, SEEK_SET);
 454                 l = len;
 455                 while (l > 0) {
 456                     int32_t m;
 457                     m = fread(buf, 1, l > sizeof(buf) ? sizeof(buf) : l, fp);
 458                     if (!m) {
 459                         /*
 460                          * This shouldn't happen unless the file
 461                          * actually changes while we are reading
 462                          * it.
 463                          */
 464                         error(ERR_NONFATAL,
 465                               "`incbin': unexpected EOF while"
 466                               " reading file `%s'", fname);
 467                         t = 0;  /* Try to exit cleanly */
 468                         break;
 469                     }
 470                     out(offset, segment, buf, OUT_RAWDATA, m,
 471                         NO_SEG, NO_SEG);
 472                     l -= m;
 473                 }
 474             }
 475             list->downlevel(LIST_INCBIN);
 476             if (instruction->times > 1) {
 477                 /*
 478                  * Dummy call to list->output to give the offset to the
 479                  * listing module.
 480                  */
 481                 list->output(offset, NULL, OUT_RAWDATA, 0);
 482                 list->uplevel(LIST_TIMES);
 483                 list->downlevel(LIST_TIMES);
 484             }
 485             fclose(fp);
 486             return instruction->times * len;
 487         }
 488         return 0;               /* if we're here, there's an error */
 489     }
 490
 491     /* Check to see if we need an address-size prefix */
 492     add_asp(instruction, bits);
 493
 494     m = find_match(&temp, instruction, segment, offset, bits);
 495
 496     if (m == MOK_GOOD) {
 497         /* Matches! */
 498         int64_t insn_size = calcsize(segment, offset, bits, instruction, temp);
 499         itimes = instruction->times;
 500         if (insn_size < 0)  /* shouldn't be, on pass two */
 501             error(ERR_PANIC, "errors made it through from pass one");
 502         else
 503             while (itimes--) {
 504                 for (j = 0; j < MAXPREFIX; j++) {
 505                     uint8_t c = 0;
 506                     switch (instruction->prefixes[j]) {
 507                     case P_WAIT:
 508                         c = 0x9B;
 509                         break;
 510                     case P_LOCK:
 511                         c = 0xF0;
 512                         break;
 513                     case P_REPNE:
 514                     case P_REPNZ:
 515                     case P_XACQUIRE:
 516                         c = 0xF2;
 517                         break;
 518                     case P_REPE:
 519                     case P_REPZ:
 520                     case P_REP:
 521                     case P_XRELEASE:
 522                         c = 0xF3;
 523                         break;
 524                     case R_CS:
 525                         if (bits == 64) {
 526                             error(ERR_WARNING | ERR_PASS2,
 527                                   "cs segment base generated, but will be ignored in 64-bit mode");
 528                         }
 529                         c = 0x2E;
 530                         break;
 531                     case R_DS:
 532                         if (bits == 64) {
 533                             error(ERR_WARNING | ERR_PASS2,
 534                                   "ds segment base generated, but will be ignored in 64-bit mode");
 535                         }
 536                         c = 0x3E;
 537                         break;
 538                     case R_ES:
 539                         if (bits == 64) {
 540                             error(ERR_WARNING | ERR_PASS2,
 541                                   "es segment base generated, but will be ignored in 64-bit mode");
 542                         }
 543                         c = 0x26;
 544                         break;
 545                     case R_FS:
 546                         c = 0x64;
 547                         break;
 548                     case R_GS:
 549                         c = 0x65;
 550                         break;
 551                     case R_SS:
 552                         if (bits == 64) {
 553                             error(ERR_WARNING | ERR_PASS2,
 554                                   "ss segment base generated, but will be ignored in 64-bit mode");
 555                         }
 556                         c = 0x36;
 557                         break;
 558                     case R_SEGR6:
 559                     case R_SEGR7:
 560                         error(ERR_NONFATAL,
 561                               "segr6 and segr7 cannot be used as prefixes");
 562                         break;
 563                     case P_A16:
 564                         if (bits == 64) {
 565                             error(ERR_NONFATAL,
 566                                   "16-bit addressing is not supported "
 567                                   "in 64-bit mode");
 568                         } else if (bits != 16)
 569                             c = 0x67;
 570                         break;
 571                     case P_A32:
 572                         if (bits != 32)
 573                             c = 0x67;
 574                         break;
 575                     case P_A64:
 576                         if (bits != 64) {
 577                             error(ERR_NONFATAL,
 578                                   "64-bit addressing is only supported "
 579                                   "in 64-bit mode");
 580                         }
 581                         break;
 582                     case P_ASP:
 583                         c = 0x67;
 584                         break;
 585                     case P_O16:
 586                         if (bits != 16)
 587                             c = 0x66;
 588                         break;
 589                     case P_O32:
 590                         if (bits == 16)
 591                             c = 0x66;
 592                         break;
 593                     case P_O64:
 594                         /* REX.W */
 595                         break;
 596                     case P_OSP:
 597                         c = 0x66;
 598                         break;
 599                     case P_none:
 600                         break;
 601                     default:
 602                         error(ERR_PANIC, "invalid instruction prefix");
 603                     }
 604                     if (c != 0) {
 605                         out(offset, segment, &c, OUT_RAWDATA, 1,
 606                             NO_SEG, NO_SEG);
 607                         offset++;
 608                     }
 609                 }
 610                 insn_end = offset + insn_size;
 611                 gencode(segment, offset, bits, instruction,
 612                         temp, insn_end);
 613                 offset += insn_size;
 614                 if (itimes > 0 && itimes == instruction->times - 1) {
 615                     /*
 616                      * Dummy call to list->output to give the offset to the
 617                      * listing module.
 618                      */
 619                     list->output(offset, NULL, OUT_RAWDATA, 0);
 620                     list->uplevel(LIST_TIMES);
 621                 }
 622             }
 623         if (instruction->times > 1)
 624             list->downlevel(LIST_TIMES);
 625         return offset - start;
 626     } else {
 627         /* No match */
 628         switch (m) {
 629         case MERR_OPSIZEMISSING:
 630             error(ERR_NONFATAL, "operation size not specified");
 631             break;
 632         case MERR_OPSIZEMISMATCH:
 633             error(ERR_NONFATAL, "mismatch in operand sizes");
 634             break;
 635         case MERR_BADCPU:
 636             error(ERR_NONFATAL, "no instruction for this cpu level");
 637             break;
 638         case MERR_BADMODE:
 639             error(ERR_NONFATAL, "instruction not supported in %d-bit mode",
 640                   bits);
 641             break;
 642         default:
 643             error(ERR_NONFATAL,
 644                   "invalid combination of opcode and operands");
 645             break;
 646         }
 647     }
 648     return 0;
 649 }
 650
 651 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 652                   insn * instruction, efunc error)
 653 {
 654     const struct itemplate *temp;
 655     enum match_result m;
 656
 657     errfunc = error;            /* to pass to other functions */
 658     cpu = cp;
 659
 660     if (instruction->opcode == I_none)
 661         return 0;
 662
 663     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 664         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 665         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 666         instruction->opcode == I_DY) {
 667         extop *e;
 668         int32_t isize, osize, wsize;
 669
 670         isize = 0;
 671         wsize = idata_bytes(instruction->opcode);
 672
 673         list_for_each(e, instruction->eops) {
 674             int32_t align;
 675
 676             osize = 0;
 677             if (e->type == EOT_DB_NUMBER) {
 678                 osize = 1;
 679                 warn_overflow_const(e->offset, wsize);
 680             } else if (e->type == EOT_DB_STRING ||
 681                        e->type == EOT_DB_STRING_FREE)
 682                 osize = e->stringlen;
 683
 684             align = (-osize) % wsize;
 685             if (align < 0)
 686                 align += wsize;
 687             isize += osize + align;
 688         }
 689         return isize * instruction->times;
 690     }
 691
 692     if (instruction->opcode == I_INCBIN) {
 693         const char *fname = instruction->eops->stringval;
 694         FILE *fp;
 695         int64_t val = 0;
 696         size_t len;
 697
 698         fp = fopen(fname, "rb");
 699         if (!fp)
 700             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 701                   fname);
 702         else if (fseek(fp, 0L, SEEK_END) < 0)
 703             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 704                   fname);
 705         else {
 706             len = ftell(fp);
 707             if (instruction->eops->next) {
 708                 len -= instruction->eops->next->offset;
 709                 if (instruction->eops->next->next &&
 710                     len > (size_t)instruction->eops->next->next->offset) {
 711                     len = (size_t)instruction->eops->next->next->offset;
 712                 }
 713             }
 714             val = instruction->times * len;
 715         }
 716         if (fp)
 717             fclose(fp);
 718         return val;
 719     }
 720
 721     /* Check to see if we need an address-size prefix */
 722     add_asp(instruction, bits);
 723
 724     m = find_match(&temp, instruction, segment, offset, bits);
 725     if (m == MOK_GOOD) {
 726         /* we've matched an instruction. */
 727         int64_t isize;
 728         int j;
 729
 730         isize = calcsize(segment, offset, bits, instruction, temp);
 731         if (isize < 0)
 732             return -1;
 733         for (j = 0; j < MAXPREFIX; j++) {
 734             switch (instruction->prefixes[j]) {
 735             case P_A16:
 736                 if (bits != 16)
 737                     isize++;
 738                 break;
 739             case P_A32:
 740                 if (bits != 32)
 741                     isize++;
 742                 break;
 743             case P_O16:
 744                 if (bits != 16)
 745                     isize++;
 746                 break;
 747             case P_O32:
 748                 if (bits == 16)
 749                     isize++;
 750                 break;
 751             case P_A64:
 752             case P_O64:
 753             case P_none:
 754                 break;
 755             default:
 756                 isize++;
 757                 break;
 758             }
 759         }
 760         return isize * instruction->times;
 761     } else {
 762         return -1;                  /* didn't match any instruction */
 763     }
 764 }
 765
 766 static bool possible_sbyte(operand *o)
 767 {
 768     return o->wrt == NO_SEG && o->segment == NO_SEG &&
 769         !(o->opflags & OPFLAG_UNKNOWN) &&
 770         optimizing >= 0 && !(o->type & STRICT);
 771 }
 772
 773 /* check that opn[op]  is a signed byte of size 16 or 32 */
 774 static bool is_sbyte16(operand *o)
 775 {
 776     int16_t v;
 777
 778     if (!possible_sbyte(o))
 779         return false;
 780
 781     v = o->offset;
 782     return v >= -128 && v <= 127;
 783 }
 784
 785 static bool is_sbyte32(operand *o)
 786 {
 787     int32_t v;
 788
 789     if (!possible_sbyte(o))
 790         return false;
 791
 792     v = o->offset;
 793     return v >= -128 && v <= 127;
 794 }
 795
 796 static void bad_hle_warn(const insn * ins, uint8_t hleok)
 797 {
 798     enum prefixes rep_pfx = ins->prefixes[PPS_REP];
 799     enum whatwarn { w_none, w_lock, w_inval } ww;
 800     static const enum whatwarn warn[2][4] =
 801     {
 802         { w_inval, w_inval, w_none, w_lock }, /* XACQUIRE */
 803         { w_inval, w_none,  w_none, w_lock }, /* XRELEASE */
 804     };
 805     unsigned int n;
 806
 807     n = (unsigned int)rep_pfx - P_XACQUIRE;
 808     if (n > 1)
 809         return;                 /* Not XACQUIRE/XRELEASE */
 810
 811     ww = warn[n][hleok];
 812     if (!is_class(MEMORY, ins->oprs[0].type))
 813         ww = w_inval;           /* HLE requires operand 0 to be memory */
 814
 815     switch (ww) {
 816     case w_none:
 817         break;
 818
 819     case w_lock:
 820         if (ins->prefixes[PPS_LOCK] != P_LOCK) {
 821             errfunc(ERR_WARNING | ERR_PASS2,
 822                     "%s with this instruction requires lock",
 823                     prefix_name(rep_pfx));
 824         }
 825         break;
 826
 827     case w_inval:
 828         errfunc(ERR_WARNING | ERR_PASS2,
 829                 "%s invalid with this instruction",
 830                 prefix_name(rep_pfx));
 831         break;
 832     }
 833 }
 834
 835 /* Common construct */
 836 #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
 837
 838 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 839                         insn * ins, const struct itemplate *temp)
 840 {
 841     const uint8_t *codes = temp->code;
 842     int64_t length = 0;
 843     uint8_t c;
 844     int rex_mask = ~0;
 845     int op1, op2;
 846     struct operand *opx;
 847     uint8_t opex = 0;
 848     enum ea_type eat;
 849     uint8_t hleok = 0;
 850     bool lockcheck = true;
 851
 852     ins->rex = 0;               /* Ensure REX is reset */
 853     eat = EA_SCALAR;            /* Expect a scalar EA */
 854
 855     if (ins->prefixes[PPS_OSIZE] == P_O64)
 856         ins->rex |= REX_W;
 857
 858     (void)segment;              /* Don't warn that this parameter is unused */
 859     (void)offset;               /* Don't warn that this parameter is unused */
 860
 861     while (*codes) {
 862         c = *codes++;
 863         op1 = (c & 3) + ((opex & 1) << 2);
 864         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
 865         opx = &ins->oprs[op1];
 866         opex = 0;               /* For the next iteration */
 867
 868         switch (c) {
 869         case 01:
 870         case 02:
 871         case 03:
 872         case 04:
 873             codes += c, length += c;
 874             break;
 875
 876         case 05:
 877         case 06:
 878         case 07:
 879             opex = c;
 880             break;
 881
 882         case4(010):
 883             ins->rex |=
 884                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 885             codes++, length++;
 886             break;
 887
 888         case4(014):
 889         case4(020):
 890         case4(024):
 891             length++;
 892             break;
 893
 894         case4(030):
 895             length += 2;
 896             break;
 897
 898         case4(034):
 899             if (opx->type & (BITS16 | BITS32 | BITS64))
 900                 length += (opx->type & BITS16) ? 2 : 4;
 901             else
 902                 length += (bits == 16) ? 2 : 4;
 903             break;
 904
 905         case4(040):
 906             length += 4;
 907             break;
 908
 909         case4(044):
 910             length += ins->addr_size >> 3;
 911             break;
 912
 913         case4(050):
 914             length++;
 915             break;
 916
 917         case4(054):
 918             length += 8; /* MOV reg64/imm */
 919             break;
 920
 921         case4(060):
 922             length += 2;
 923             break;
 924
 925         case4(064):
 926             if (opx->type & (BITS16 | BITS32 | BITS64))
 927                 length += (opx->type & BITS16) ? 2 : 4;
 928             else
 929                 length += (bits == 16) ? 2 : 4;
 930             break;
 931
 932         case4(070):
 933             length += 4;
 934             break;
 935
 936         case4(074):
 937             length += 2;
 938             break;
 939
 940         case4(0140):
 941             length += is_sbyte16(opx) ? 1 : 2;
 942             break;
 943
 944         case4(0144):
 945             codes++;
 946             length++;
 947             break;
 948
 949         case4(0150):
 950             length += is_sbyte32(opx) ? 1 : 4;
 951             break;
 952
 953         case4(0154):
 954             codes++;
 955             length++;
 956             break;
 957
 958         case 0172:
 959         case 0173:
 960             codes++;
 961             length++;
 962             break;
 963
 964         case4(0174):
 965             length++;
 966             break;
 967
 968         case4(0250):
 969             length += is_sbyte32(opx) ? 1 : 4;
 970             break;
 971
 972         case4(0254):
 973             length += 4;
 974             break;
 975
 976         case4(0260):
 977             ins->rex |= REX_V;
 978             ins->vexreg = regval(opx);
 979             ins->vex_cm = *codes++;
 980             ins->vex_wlp = *codes++;
 981             break;
 982
 983         case 0264:
 984             if (has_prefix(ins, PPS_REP, P_XACQUIRE) ||
 985                 has_prefix(ins, PPS_REP, P_XRELEASE))
 986                 return -1;
 987             break;
 988
 989         case 0265:
 990         case 0266:
 991         case 0267:
 992             hleok = c & 3;
 993             break;
 994
 995         case 0270:
 996             ins->rex |= REX_V;
 997             ins->vexreg = 0;
 998             ins->vex_cm = *codes++;
 999             ins->vex_wlp = *codes++;
1000             break;
1001
1002         case4(0274):
1003             length++;
1004             break;
1005
1006         case4(0300):
1007             break;
1008
1009         case 0310:
1010             if (bits == 64)
1011                 return -1;
1012             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
1013             break;
1014
1015         case 0311:
1016             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
1017             break;
1018
1019         case 0312:
1020             break;
1021
1022         case 0313:
1023             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
1024                 has_prefix(ins, PPS_ASIZE, P_A32))
1025                 return -1;
1026             break;
1027
1028         case4(0314):
1029             break;
1030
1031         case 0320:
1032         {
1033             enum prefixes pfx = ins->prefixes[PPS_OSIZE];
1034             if (pfx == P_O16)
1035                 break;
1036             if (pfx != P_none)
1037                 errfunc(ERR_WARNING | ERR_PASS2, "invalid operand size prefix");
1038             else
1039                 ins->prefixes[PPS_OSIZE] = P_O16;
1040             break;
1041         }
1042
1043         case 0321:
1044         {
1045             enum prefixes pfx = ins->prefixes[PPS_OSIZE];
1046             if (pfx == P_O32)
1047                 break;
1048             if (pfx != P_none)
1049                 errfunc(ERR_WARNING | ERR_PASS2, "invalid operand size prefix");
1050             else
1051                 ins->prefixes[PPS_OSIZE] = P_O32;
1052             break;
1053         }
1054
1055         case 0322:
1056             break;
1057
1058         case 0323:
1059             rex_mask &= ~REX_W;
1060             break;
1061
1062         case 0324:
1063             ins->rex |= REX_W;
1064             break;
1065
1066         case 0325:
1067             ins->rex |= REX_NH;
1068             break;
1069
1070         case 0330:
1071             codes++, length++;
1072             break;
1073
1074         case 0331:
1075             break;
1076
1077         case 0332:
1078         case 0333:
1079             length++;
1080             break;
1081
1082         case 0334:
1083             ins->rex |= REX_L;
1084             break;
1085
1086         case 0335:
1087             break;
1088
1089         case 0336:
1090             if (!ins->prefixes[PPS_REP])
1091                 ins->prefixes[PPS_REP] = P_REP;
1092             break;
1093
1094         case 0337:
1095             if (!ins->prefixes[PPS_REP])
1096                 ins->prefixes[PPS_REP] = P_REPNE;
1097             break;
1098
1099         case 0340:
1100             if (ins->oprs[0].segment != NO_SEG)
1101                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1102                         " quantity of BSS space");
1103             else
1104                 length += ins->oprs[0].offset;
1105             break;
1106
1107         case 0341:
1108             if (!ins->prefixes[PPS_WAIT])
1109                 ins->prefixes[PPS_WAIT] = P_WAIT;
1110             break;
1111
1112         case4(0344):
1113             length++;
1114             break;
1115
1116         case 0360:
1117             break;
1118
1119         case 0361:
1120         case 0362:
1121         case 0363:
1122             length++;
1123             break;
1124
1125         case 0364:
1126         case 0365:
1127             break;
1128
1129         case 0366:
1130         case 0367:
1131             length++;
1132             break;
1133
1134         case 0370:
1135         case 0371:
1136         case 0372:
1137             break;
1138
1139         case 0373:
1140             length++;
1141             break;
1142
1143         case 0374:
1144             eat = EA_XMMVSIB;
1145             break;
1146
1147         case 0375:
1148             eat = EA_YMMVSIB;
1149             break;
1150
1151         case4(0100):
1152         case4(0110):
1153         case4(0120):
1154         case4(0130):
1155         case4(0200):
1156         case4(0204):
1157         case4(0210):
1158         case4(0214):
1159         case4(0220):
1160         case4(0224):
1161         case4(0230):
1162         case4(0234):
1163             {
1164                 ea ea_data;
1165                 int rfield;
1166                 opflags_t rflags;
1167                 struct operand *opy = &ins->oprs[op2];
1168
1169                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1170
1171                 if (c <= 0177) {
1172                     /* pick rfield from operand b (opx) */
1173                     rflags = regflag(opx);
1174                     rfield = nasm_regvals[opx->basereg];
1175                 } else {
1176                     rflags = 0;
1177                     rfield = c & 7;
1178                 }
1179                 if (process_ea(opy, &ea_data, bits,ins->addr_size,
1180                                rfield, rflags) != eat) {
1181                     errfunc(ERR_NONFATAL, "invalid effective address");
1182                     return -1;
1183                 } else {
1184                     ins->rex |= ea_data.rex;
1185                     length += ea_data.size;
1186                 }
1187             }
1188             break;
1189
1190         default:
1191             errfunc(ERR_PANIC, "internal instruction table corrupt"
1192                     ": instruction code \\%o (0x%02X) given", c, c);
1193             break;
1194         }
1195     }
1196
1197     ins->rex &= rex_mask;
1198
1199     if (ins->rex & REX_NH) {
1200         if (ins->rex & REX_H) {
1201             errfunc(ERR_NONFATAL, "instruction cannot use high registers");
1202             return -1;
1203         }
1204         ins->rex &= ~REX_P;        /* Don't force REX prefix due to high reg */
1205     }
1206
1207     if (ins->rex & REX_V) {
1208         int bad32 = REX_R|REX_W|REX_X|REX_B;
1209
1210         if (ins->rex & REX_H) {
1211             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1212             return -1;
1213         }
1214         switch (ins->vex_wlp & 060) {
1215         case 000:
1216         case 040:
1217             ins->rex &= ~REX_W;
1218             break;
1219         case 020:
1220             ins->rex |= REX_W;
1221             bad32 &= ~REX_W;
1222             break;
1223         case 060:
1224             /* Follow REX_W */
1225             break;
1226         }
1227
1228         if (bits != 64 && ((ins->rex & bad32) || ins->vexreg > 7)) {
1229             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1230             return -1;
1231         }
1232         if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B)))
1233             length += 3;
1234         else
1235             length += 2;
1236     } else if (ins->rex & REX_REAL) {
1237         if (ins->rex & REX_H) {
1238             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1239             return -1;
1240         } else if (bits == 64) {
1241             length++;
1242         } else if ((ins->rex & REX_L) &&
1243                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1244                    cpu >= IF_X86_64) {
1245             /* LOCK-as-REX.R */
1246             assert_no_prefix(ins, PPS_LOCK);
1247             lockcheck = false;  /* Already errored, no need for warning */
1248             length++;
1249         } else {
1250             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1251             return -1;
1252         }
1253     }
1254
1255     if (has_prefix(ins, PPS_LOCK, P_LOCK) && lockcheck &&
1256         (!(temp->flags & IF_LOCK) || !is_class(MEMORY, ins->oprs[0].type))) {
1257         errfunc(ERR_WARNING | ERR_PASS2,
1258                 "instruction is not lockable");
1259     }
1260
1261     bad_hle_warn(ins, hleok);
1262
1263     return length;
1264 }
1265
1266 #define EMIT_REX()                                                              \
1267     if (!(ins->rex & REX_V) && (ins->rex & REX_REAL) && (bits == 64)) { \
1268         ins->rex = (ins->rex & REX_REAL)|REX_P;                                 \
1269         out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG);        \
1270         ins->rex = 0;                                                           \
1271         offset += 1;                                                            \
1272     }
1273
1274 static void gencode(int32_t segment, int64_t offset, int bits,
1275                     insn * ins, const struct itemplate *temp,
1276                     int64_t insn_end)
1277 {
1278     static const char condval[] = {   /* conditional opcodes */
1279         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1280         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1281         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1282     };
1283     uint8_t c;
1284     uint8_t bytes[4];
1285     int64_t size;
1286     int64_t data;
1287     int op1, op2;
1288     struct operand *opx;
1289     const uint8_t *codes = temp->code;
1290     uint8_t opex = 0;
1291     enum ea_type eat = EA_SCALAR;
1292
1293     while (*codes) {
1294         c = *codes++;
1295         op1 = (c & 3) + ((opex & 1) << 2);
1296         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
1297         opx = &ins->oprs[op1];
1298         opex = 0;                /* For the next iteration */
1299
1300         switch (c) {
1301         case 01:
1302         case 02:
1303         case 03:
1304         case 04:
1305             EMIT_REX();
1306             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1307             codes += c;
1308             offset += c;
1309             break;
1310
1311         case 05:
1312         case 06:
1313         case 07:
1314             opex = c;
1315             break;
1316
1317         case4(010):
1318             EMIT_REX();
1319             bytes[0] = *codes++ + (regval(opx) & 7);
1320             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1321             offset += 1;
1322             break;
1323
1324         case4(014):
1325             /*
1326              * The test for BITS8 and SBYTE here is intended to avoid
1327              * warning on optimizer actions due to SBYTE, while still
1328              * warn on explicit BYTE directives.  Also warn, obviously,
1329              * if the optimizer isn't enabled.
1330              */
1331             if (((opx->type & BITS8) ||
1332                  !(opx->type & temp->opd[op1] & BYTENESS)) &&
1333                 (opx->offset < -128 || opx->offset > 127)) {
1334                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1335                         "signed byte value exceeds bounds");
1336             }
1337             if (opx->segment != NO_SEG) {
1338                 data = opx->offset;
1339                 out(offset, segment, &data, OUT_ADDRESS, 1,
1340                     opx->segment, opx->wrt);
1341             } else {
1342                 bytes[0] = opx->offset;
1343                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1344                     NO_SEG);
1345             }
1346             offset += 1;
1347             break;
1348
1349         case4(020):
1350             if (opx->offset < -256 || opx->offset > 255) {
1351                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1352                         "byte value exceeds bounds");
1353             }
1354             if (opx->segment != NO_SEG) {
1355                 data = opx->offset;
1356                 out(offset, segment, &data, OUT_ADDRESS, 1,
1357                     opx->segment, opx->wrt);
1358             } else {
1359                 bytes[0] = opx->offset;
1360                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1361                     NO_SEG);
1362             }
1363             offset += 1;
1364             break;
1365
1366         case4(024):
1367             if (opx->offset < 0 || opx->offset > 255)
1368                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1369                         "unsigned byte value exceeds bounds");
1370             if (opx->segment != NO_SEG) {
1371                 data = opx->offset;
1372                 out(offset, segment, &data, OUT_ADDRESS, 1,
1373                     opx->segment, opx->wrt);
1374             } else {
1375                 bytes[0] = opx->offset;
1376                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1377                     NO_SEG);
1378             }
1379             offset += 1;
1380             break;
1381
1382         case4(030):
1383             warn_overflow_opd(opx, 2);
1384             data = opx->offset;
1385             out(offset, segment, &data, OUT_ADDRESS, 2,
1386                 opx->segment, opx->wrt);
1387             offset += 2;
1388             break;
1389
1390         case4(034):
1391             if (opx->type & (BITS16 | BITS32))
1392                 size = (opx->type & BITS16) ? 2 : 4;
1393             else
1394                 size = (bits == 16) ? 2 : 4;
1395             warn_overflow_opd(opx, size);
1396             data = opx->offset;
1397             out(offset, segment, &data, OUT_ADDRESS, size,
1398                 opx->segment, opx->wrt);
1399             offset += size;
1400             break;
1401
1402         case4(040):
1403             warn_overflow_opd(opx, 4);
1404             data = opx->offset;
1405             out(offset, segment, &data, OUT_ADDRESS, 4,
1406                 opx->segment, opx->wrt);
1407             offset += 4;
1408             break;
1409
1410         case4(044):
1411             data = opx->offset;
1412             size = ins->addr_size >> 3;
1413             warn_overflow_opd(opx, size);
1414             out(offset, segment, &data, OUT_ADDRESS, size,
1415                 opx->segment, opx->wrt);
1416             offset += size;
1417             break;
1418
1419         case4(050):
1420             if (opx->segment != segment) {
1421                 data = opx->offset;
1422                 out(offset, segment, &data,
1423                     OUT_REL1ADR, insn_end - offset,
1424                     opx->segment, opx->wrt);
1425             } else {
1426                 data = opx->offset - insn_end;
1427                 if (data > 127 || data < -128)
1428                     errfunc(ERR_NONFATAL, "short jump is out of range");
1429                 out(offset, segment, &data,
1430                     OUT_ADDRESS, 1, NO_SEG, NO_SEG);
1431             }
1432             offset += 1;
1433             break;
1434
1435         case4(054):
1436             data = (int64_t)opx->offset;
1437             out(offset, segment, &data, OUT_ADDRESS, 8,
1438                 opx->segment, opx->wrt);
1439             offset += 8;
1440             break;
1441
1442         case4(060):
1443             if (opx->segment != segment) {
1444                 data = opx->offset;
1445                 out(offset, segment, &data,
1446                     OUT_REL2ADR, insn_end - offset,
1447                     opx->segment, opx->wrt);
1448             } else {
1449                 data = opx->offset - insn_end;
1450                 out(offset, segment, &data,
1451                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1452             }
1453             offset += 2;
1454             break;
1455
1456         case4(064):
1457             if (opx->type & (BITS16 | BITS32 | BITS64))
1458                 size = (opx->type & BITS16) ? 2 : 4;
1459             else
1460                 size = (bits == 16) ? 2 : 4;
1461             if (opx->segment != segment) {
1462                 data = opx->offset;
1463                 out(offset, segment, &data,
1464                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1465                     insn_end - offset, opx->segment, opx->wrt);
1466             } else {
1467                 data = opx->offset - insn_end;
1468                 out(offset, segment, &data,
1469                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1470             }
1471             offset += size;
1472             break;
1473
1474         case4(070):
1475             if (opx->segment != segment) {
1476                 data = opx->offset;
1477                 out(offset, segment, &data,
1478                     OUT_REL4ADR, insn_end - offset,
1479                     opx->segment, opx->wrt);
1480             } else {
1481                 data = opx->offset - insn_end;
1482                 out(offset, segment, &data,
1483                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1484             }
1485             offset += 4;
1486             break;
1487
1488         case4(074):
1489             if (opx->segment == NO_SEG)
1490                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1491                         " relocatable");
1492             data = 0;
1493             out(offset, segment, &data, OUT_ADDRESS, 2,
1494                 outfmt->segbase(1 + opx->segment),
1495                 opx->wrt);
1496             offset += 2;
1497             break;
1498
1499         case4(0140):
1500             data = opx->offset;
1501             warn_overflow_opd(opx, 2);
1502             if (is_sbyte16(opx)) {
1503                 bytes[0] = data;
1504                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1505                     NO_SEG);
1506                 offset++;
1507             } else {
1508                 out(offset, segment, &data, OUT_ADDRESS, 2,
1509                     opx->segment, opx->wrt);
1510                 offset += 2;
1511             }
1512             break;
1513
1514         case4(0144):
1515             EMIT_REX();
1516             bytes[0] = *codes++;
1517             if (is_sbyte16(opx))
1518                 bytes[0] |= 2;  /* s-bit */
1519             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1520             offset++;
1521             break;
1522
1523         case4(0150):
1524             data = opx->offset;
1525             warn_overflow_opd(opx, 4);
1526             if (is_sbyte32(opx)) {
1527                 bytes[0] = data;
1528                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1529                     NO_SEG);
1530                 offset++;
1531             } else {
1532                 out(offset, segment, &data, OUT_ADDRESS, 4,
1533                     opx->segment, opx->wrt);
1534                 offset += 4;
1535             }
1536             break;
1537
1538         case4(0154):
1539             EMIT_REX();
1540             bytes[0] = *codes++;
1541             if (is_sbyte32(opx))
1542                 bytes[0] |= 2;  /* s-bit */
1543             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1544             offset++;
1545             break;
1546
1547         case 0172:
1548             c = *codes++;
1549             opx = &ins->oprs[c >> 3];
1550             bytes[0] = nasm_regvals[opx->basereg] << 4;
1551             opx = &ins->oprs[c & 7];
1552             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1553                 errfunc(ERR_NONFATAL,
1554                         "non-absolute expression not permitted as argument %d",
1555                         c & 7);
1556             } else {
1557                 if (opx->offset & ~15) {
1558                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1559                             "four-bit argument exceeds bounds");
1560                 }
1561                 bytes[0] |= opx->offset & 15;
1562             }
1563             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1564             offset++;
1565             break;
1566
1567         case 0173:
1568             c = *codes++;
1569             opx = &ins->oprs[c >> 4];
1570             bytes[0] = nasm_regvals[opx->basereg] << 4;
1571             bytes[0] |= c & 15;
1572             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1573             offset++;
1574             break;
1575
1576         case4(0174):
1577             bytes[0] = nasm_regvals[opx->basereg] << 4;
1578             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1579             offset++;
1580             break;
1581
1582         case4(0250):
1583             data = opx->offset;
1584             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1585                 (int32_t)data != (int64_t)data) {
1586                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1587                         "signed dword immediate exceeds bounds");
1588             }
1589             if (is_sbyte32(opx)) {
1590                 bytes[0] = data;
1591                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1592                     NO_SEG);
1593                 offset++;
1594             } else {
1595                 out(offset, segment, &data, OUT_ADDRESS, 4,
1596                     opx->segment, opx->wrt);
1597                 offset += 4;
1598             }
1599             break;
1600
1601         case4(0254):
1602             data = opx->offset;
1603             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1604                 (int32_t)data != (int64_t)data) {
1605                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1606                         "signed dword immediate exceeds bounds");
1607             }
1608             out(offset, segment, &data, OUT_ADDRESS, 4,
1609                 opx->segment, opx->wrt);
1610             offset += 4;
1611             break;
1612
1613         case4(0260):
1614         case 0270:
1615             codes += 2;
1616             if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1617                 bytes[0] = (ins->vex_cm >> 6) ? 0x8f : 0xc4;
1618                 bytes[1] = (ins->vex_cm & 31) | ((~ins->rex & 7) << 5);
1619                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1620                     ((~ins->vexreg & 15)<< 3) | (ins->vex_wlp & 07);
1621                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1622                 offset += 3;
1623             } else {
1624                 bytes[0] = 0xc5;
1625                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1626                     ((~ins->vexreg & 15) << 3) | (ins->vex_wlp & 07);
1627                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1628                 offset += 2;
1629             }
1630             break;
1631
1632         case4(0264):
1633             break;
1634
1635         case4(0274):
1636         {
1637             uint64_t uv, um;
1638             int s;
1639
1640             if (ins->rex & REX_W)
1641                 s = 64;
1642             else if (ins->prefixes[PPS_OSIZE] == P_O16)
1643                 s = 16;
1644             else if (ins->prefixes[PPS_OSIZE] == P_O32)
1645                 s = 32;
1646             else
1647                 s = bits;
1648
1649             um = (uint64_t)2 << (s-1);
1650             uv = opx->offset;
1651
1652             if (uv > 127 && uv < (uint64_t)-128 &&
1653                 (uv < um-128 || uv > um-1)) {
1654                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1655                         "signed byte value exceeds bounds");
1656             }
1657             if (opx->segment != NO_SEG) {
1658                 data = uv;
1659                 out(offset, segment, &data, OUT_ADDRESS, 1,
1660                     opx->segment, opx->wrt);
1661             } else {
1662                 bytes[0] = uv;
1663                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1664                     NO_SEG);
1665             }
1666             offset += 1;
1667             break;
1668         }
1669
1670         case4(0300):
1671             break;
1672
1673         case 0310:
1674             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1675                 *bytes = 0x67;
1676                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1677                 offset += 1;
1678             } else
1679                 offset += 0;
1680             break;
1681
1682         case 0311:
1683             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1684                 *bytes = 0x67;
1685                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1686                 offset += 1;
1687             } else
1688                 offset += 0;
1689             break;
1690
1691         case 0312:
1692             break;
1693
1694         case 0313:
1695             ins->rex = 0;
1696             break;
1697
1698         case4(0314):
1699             break;
1700
1701         case 0320:
1702         case 0321:
1703             break;
1704
1705         case 0322:
1706         case 0323:
1707             break;
1708
1709         case 0324:
1710             ins->rex |= REX_W;
1711             break;
1712
1713         case 0325:
1714             break;
1715
1716         case 0330:
1717             *bytes = *codes++ ^ condval[ins->condition];
1718             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1719             offset += 1;
1720             break;
1721
1722         case 0331:
1723             break;
1724
1725         case 0332:
1726         case 0333:
1727             *bytes = c - 0332 + 0xF2;
1728             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1729             offset += 1;
1730             break;
1731
1732         case 0334:
1733             if (ins->rex & REX_R) {
1734                 *bytes = 0xF0;
1735                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1736                 offset += 1;
1737             }
1738             ins->rex &= ~(REX_L|REX_R);
1739             break;
1740
1741         case 0335:
1742             break;
1743
1744         case 0336:
1745         case 0337:
1746             break;
1747
1748         case 0340:
1749             if (ins->oprs[0].segment != NO_SEG)
1750                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1751             else {
1752                 int64_t size = ins->oprs[0].offset;
1753                 if (size > 0)
1754                     out(offset, segment, NULL,
1755                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1756                 offset += size;
1757             }
1758             break;
1759
1760         case 0341:
1761             break;
1762
1763         case 0344:
1764         case 0345:
1765             bytes[0] = c & 1;
1766             switch (ins->oprs[0].basereg) {
1767             case R_CS:
1768                 bytes[0] += 0x0E;
1769                 break;
1770             case R_DS:
1771                 bytes[0] += 0x1E;
1772                 break;
1773             case R_ES:
1774                 bytes[0] += 0x06;
1775                 break;
1776             case R_SS:
1777                 bytes[0] += 0x16;
1778                 break;
1779             default:
1780                 errfunc(ERR_PANIC,
1781                         "bizarre 8086 segment register received");
1782             }
1783             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1784             offset++;
1785             break;
1786
1787         case 0346:
1788         case 0347:
1789             bytes[0] = c & 1;
1790             switch (ins->oprs[0].basereg) {
1791             case R_FS:
1792                 bytes[0] += 0xA0;
1793                 break;
1794             case R_GS:
1795                 bytes[0] += 0xA8;
1796                 break;
1797             default:
1798                 errfunc(ERR_PANIC,
1799                         "bizarre 386 segment register received");
1800             }
1801             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1802             offset++;
1803             break;
1804
1805         case 0360:
1806             break;
1807
1808         case 0361:
1809             bytes[0] = 0x66;
1810             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1811             offset += 1;
1812             break;
1813
1814         case 0362:
1815         case 0363:
1816             bytes[0] = c - 0362 + 0xf2;
1817             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1818             offset += 1;
1819             break;
1820
1821         case 0364:
1822         case 0365:
1823             break;
1824
1825         case 0366:
1826         case 0367:
1827             *bytes = c - 0366 + 0x66;
1828             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1829             offset += 1;
1830             break;
1831
1832         case 0370:
1833         case 0371:
1834             break;
1835
1836         case 0373:
1837             *bytes = bits == 16 ? 3 : 5;
1838             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1839             offset += 1;
1840             break;
1841
1842         case 0374:
1843             eat = EA_XMMVSIB;
1844             break;
1845
1846         case 0375:
1847             eat = EA_YMMVSIB;
1848             break;
1849
1850         case4(0100):
1851         case4(0110):
1852         case4(0120):
1853         case4(0130):
1854         case4(0200):
1855         case4(0204):
1856         case4(0210):
1857         case4(0214):
1858         case4(0220):
1859         case4(0224):
1860         case4(0230):
1861         case4(0234):
1862             {
1863                 ea ea_data;
1864                 int rfield;
1865                 opflags_t rflags;
1866                 uint8_t *p;
1867                 int32_t s;
1868                 struct operand *opy = &ins->oprs[op2];
1869
1870                 if (c <= 0177) {
1871                     /* pick rfield from operand b (opx) */
1872                     rflags = regflag(opx);
1873                     rfield = nasm_regvals[opx->basereg];
1874                 } else {
1875                     /* rfield is constant */
1876                     rflags = 0;
1877                     rfield = c & 7;
1878                 }
1879
1880                 if (process_ea(opy, &ea_data, bits, ins->addr_size,
1881                                rfield, rflags) != eat)
1882                     errfunc(ERR_NONFATAL, "invalid effective address");
1883
1884                 p = bytes;
1885                 *p++ = ea_data.modrm;
1886                 if (ea_data.sib_present)
1887                     *p++ = ea_data.sib;
1888
1889                 s = p - bytes;
1890                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1891
1892                 /*
1893                  * Make sure the address gets the right offset in case
1894                  * the line breaks in the .lst file (BR 1197827)
1895                  */
1896                 offset += s;
1897                 s = 0;
1898
1899                 switch (ea_data.bytes) {
1900                 case 0:
1901                     break;
1902                 case 1:
1903                 case 2:
1904                 case 4:
1905                 case 8:
1906                     data = opy->offset;
1907                     s += ea_data.bytes;
1908                     if (ea_data.rip) {
1909                         if (opy->segment == segment) {
1910                             data -= insn_end;
1911                             if (overflow_signed(data, ea_data.bytes))
1912                                 warn_overflow(ERR_PASS2, ea_data.bytes);
1913                             out(offset, segment, &data, OUT_ADDRESS,
1914                                 ea_data.bytes, NO_SEG, NO_SEG);
1915                         } else {
1916                             /* overflow check in output/linker? */
1917                             out(offset, segment, &data,        OUT_REL4ADR,
1918                                 insn_end - offset, opy->segment, opy->wrt);
1919                         }
1920                     } else {
1921                         if (overflow_general(opy->offset, ins->addr_size >> 3) ||
1922                             signed_bits(opy->offset, ins->addr_size) !=
1923                             signed_bits(opy->offset, ea_data.bytes * 8))
1924                             warn_overflow(ERR_PASS2, ea_data.bytes);
1925
1926                         out(offset, segment, &data, OUT_ADDRESS,
1927                             ea_data.bytes, opy->segment, opy->wrt);
1928                     }
1929                     break;
1930                 default:
1931                     /* Impossible! */
1932                     errfunc(ERR_PANIC,
1933                             "Invalid amount of bytes (%d) for offset?!",
1934                             ea_data.bytes);
1935                     break;
1936                 }
1937                 offset += s;
1938             }
1939             break;
1940
1941         default:
1942             errfunc(ERR_PANIC, "internal instruction table corrupt"
1943                     ": instruction code \\%o (0x%02X) given", c, c);
1944             break;
1945         }
1946     }
1947 }
1948
1949 static opflags_t regflag(const operand * o)
1950 {
1951     if (!is_register(o->basereg))
1952         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1953     return nasm_reg_flags[o->basereg];
1954 }
1955
1956 static int32_t regval(const operand * o)
1957 {
1958     if (!is_register(o->basereg))
1959         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1960     return nasm_regvals[o->basereg];
1961 }
1962
1963 static int op_rexflags(const operand * o, int mask)
1964 {
1965     opflags_t flags;
1966     int val;
1967
1968     if (!is_register(o->basereg))
1969         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1970
1971     flags = nasm_reg_flags[o->basereg];
1972     val = nasm_regvals[o->basereg];
1973
1974     return rexflags(val, flags, mask);
1975 }
1976
1977 static int rexflags(int val, opflags_t flags, int mask)
1978 {
1979     int rex = 0;
1980
1981     if (val >= 8)
1982         rex |= REX_B|REX_X|REX_R;
1983     if (flags & BITS64)
1984         rex |= REX_W;
1985     if (!(REG_HIGH & ~flags))                   /* AH, CH, DH, BH */
1986         rex |= REX_H;
1987     else if (!(REG8 & ~flags) && val >= 4)      /* SPL, BPL, SIL, DIL */
1988         rex |= REX_P;
1989
1990     return rex & mask;
1991 }
1992
1993 static enum match_result find_match(const struct itemplate **tempp,
1994                                     insn *instruction,
1995                                     int32_t segment, int64_t offset, int bits)
1996 {
1997     const struct itemplate *temp;
1998     enum match_result m, merr;
1999     opflags_t xsizeflags[MAX_OPERANDS];
2000     bool opsizemissing = false;
2001     int i;
2002
2003     for (i = 0; i < instruction->operands; i++)
2004         xsizeflags[i] = instruction->oprs[i].type & SIZE_MASK;
2005
2006     merr = MERR_INVALOP;
2007
2008     for (temp = nasm_instructions[instruction->opcode];
2009          temp->opcode != I_none; temp++) {
2010         m = matches(temp, instruction, bits);
2011         if (m == MOK_JUMP) {
2012             if (jmp_match(segment, offset, bits, instruction, temp))
2013                 m = MOK_GOOD;
2014             else
2015                 m = MERR_INVALOP;
2016         } else if (m == MERR_OPSIZEMISSING &&
2017                    (temp->flags & IF_SMASK) != IF_SX) {
2018             /*
2019              * Missing operand size and a candidate for fuzzy matching...
2020              */
2021             for (i = 0; i < temp->operands; i++) {
2022                 if ((temp->opd[i] & SAME_AS) == 0)
2023                     xsizeflags[i] |= temp->opd[i] & SIZE_MASK;
2024             }
2025             opsizemissing = true;
2026         }
2027         if (m > merr)
2028             merr = m;
2029         if (merr == MOK_GOOD)
2030             goto done;
2031     }
2032
2033     /* No match, but see if we can get a fuzzy operand size match... */
2034     if (!opsizemissing)
2035         goto done;
2036
2037     for (i = 0; i < instruction->operands; i++) {
2038         /*
2039          * We ignore extrinsic operand sizes on registers, so we should
2040          * never try to fuzzy-match on them.  This also resolves the case
2041          * when we have e.g. "xmmrm128" in two different positions.
2042          */
2043         if (is_class(REGISTER, instruction->oprs[i].type))
2044             continue;
2045
2046         /* This tests if xsizeflags[i] has more than one bit set */
2047         if ((xsizeflags[i] & (xsizeflags[i]-1)))
2048             goto done;                /* No luck */
2049
2050         instruction->oprs[i].type |= xsizeflags[i]; /* Set the size */
2051     }
2052
2053     /* Try matching again... */
2054     for (temp = nasm_instructions[instruction->opcode];
2055          temp->opcode != I_none; temp++) {
2056         m = matches(temp, instruction, bits);
2057         if (m == MOK_JUMP) {
2058             if (jmp_match(segment, offset, bits, instruction, temp))
2059                 m = MOK_GOOD;
2060             else
2061                 m = MERR_INVALOP;
2062         }
2063         if (m > merr)
2064             merr = m;
2065         if (merr == MOK_GOOD)
2066             goto done;
2067     }
2068
2069 done:
2070     *tempp = temp;
2071     return merr;
2072 }
2073
2074 static enum match_result matches(const struct itemplate *itemp,
2075                                  insn *instruction, int bits)
2076 {
2077     int i, size[MAX_OPERANDS], asize, oprs;
2078     bool opsizemissing = false;
2079
2080     /*
2081      * Check the opcode
2082      */
2083     if (itemp->opcode != instruction->opcode)
2084         return MERR_INVALOP;
2085
2086     /*
2087      * Count the operands
2088      */
2089     if (itemp->operands != instruction->operands)
2090         return MERR_INVALOP;
2091
2092     /*
2093      * Is it legal?
2094      */
2095     if (!(optimizing > 0) && (itemp->flags & IF_OPT))
2096         return MERR_INVALOP;
2097
2098     /*
2099      * Check that no spurious colons or TOs are present
2100      */
2101     for (i = 0; i < itemp->operands; i++)
2102         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
2103             return MERR_INVALOP;
2104
2105     /*
2106      * Process size flags
2107      */
2108     switch (itemp->flags & IF_SMASK) {
2109     case IF_SB:
2110         asize = BITS8;
2111         break;
2112     case IF_SW:
2113         asize = BITS16;
2114         break;
2115     case IF_SD:
2116         asize = BITS32;
2117         break;
2118     case IF_SQ:
2119         asize = BITS64;
2120         break;
2121     case IF_SO:
2122         asize = BITS128;
2123         break;
2124     case IF_SY:
2125         asize = BITS256;
2126         break;
2127     case IF_SZ:
2128         switch (bits) {
2129         case 16:
2130             asize = BITS16;
2131             break;
2132         case 32:
2133             asize = BITS32;
2134             break;
2135         case 64:
2136             asize = BITS64;
2137             break;
2138         default:
2139             asize = 0;
2140             break;
2141         }
2142         break;
2143     default:
2144         asize = 0;
2145         break;
2146     }
2147
2148     if (itemp->flags & IF_ARMASK) {
2149         /* S- flags only apply to a specific operand */
2150         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
2151         memset(size, 0, sizeof size);
2152         size[i] = asize;
2153     } else {
2154         /* S- flags apply to all operands */
2155         for (i = 0; i < MAX_OPERANDS; i++)
2156             size[i] = asize;
2157     }
2158
2159     /*
2160      * Check that the operand flags all match up,
2161      * it's a bit tricky so lets be verbose:
2162      *
2163      * 1) Find out the size of operand. If instruction
2164      *    doesn't have one specified -- we're trying to
2165      *    guess it either from template (IF_S* flag) or
2166      *    from code bits.
2167      *
2168      * 2) If template operand (i) has SAME_AS flag [used for registers only]
2169      *    (ie the same operand as was specified somewhere in template, and
2170      *    this referred operand index is being achieved via ~SAME_AS)
2171      *    we are to be sure that both registers (in template and instruction)
2172      *    do exactly match.
2173      *
2174      * 3) If template operand do not match the instruction OR
2175      *    template has an operand size specified AND this size differ
2176      *    from which instruction has (perhaps we got it from code bits)
2177      *    we are:
2178      *      a)  Check that only size of instruction and operand is differ
2179      *          other characteristics do match
2180      *      b)  Perhaps it's a register specified in instruction so
2181      *          for such a case we just mark that operand as "size
2182      *          missing" and this will turn on fuzzy operand size
2183      *          logic facility (handled by a caller)
2184      */
2185     for (i = 0; i < itemp->operands; i++) {
2186         opflags_t type = instruction->oprs[i].type;
2187         if (!(type & SIZE_MASK))
2188             type |= size[i];
2189
2190         if (itemp->opd[i] & SAME_AS) {
2191             int j = itemp->opd[i] & ~SAME_AS;
2192             if (type != instruction->oprs[j].type ||
2193                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2194                 return MERR_INVALOP;
2195         } else if (itemp->opd[i] & ~type ||
2196             ((itemp->opd[i] & SIZE_MASK) &&
2197              ((itemp->opd[i] ^ type) & SIZE_MASK))) {
2198             if ((itemp->opd[i] & ~type & ~SIZE_MASK) || (type & SIZE_MASK)) {
2199                 return MERR_INVALOP;
2200             } else if (!is_class(REGISTER, type)) {
2201                 /*
2202                  * Note: we don't honor extrinsic operand sizes for registers,
2203                  * so "missing operand size" for a register should be
2204                  * considered a wildcard match rather than an error.
2205                  */
2206                 opsizemissing = true;
2207             }
2208         }
2209     }
2210
2211     if (opsizemissing)
2212         return MERR_OPSIZEMISSING;
2213
2214     /*
2215      * Check operand sizes
2216      */
2217     if (itemp->flags & (IF_SM | IF_SM2)) {
2218         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2219         for (i = 0; i < oprs; i++) {
2220             asize = itemp->opd[i] & SIZE_MASK;
2221             if (asize) {
2222                 for (i = 0; i < oprs; i++)
2223                     size[i] = asize;
2224                 break;
2225             }
2226         }
2227     } else {
2228         oprs = itemp->operands;
2229     }
2230
2231     for (i = 0; i < itemp->operands; i++) {
2232         if (!(itemp->opd[i] & SIZE_MASK) &&
2233             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2234             return MERR_OPSIZEMISMATCH;
2235     }
2236
2237     /*
2238      * Check template is okay at the set cpu level
2239      */
2240     if (((itemp->flags & IF_PLEVEL) > cpu))
2241         return MERR_BADCPU;
2242
2243     /*
2244      * Verify the appropriate long mode flag.
2245      */
2246     if ((itemp->flags & (bits == 64 ? IF_NOLONG : IF_LONG)))
2247         return MERR_BADMODE;
2248
2249     /*
2250      * Check if special handling needed for Jumps
2251      */
2252     if ((itemp->code[0] & ~1) == 0370)
2253         return MOK_JUMP;
2254
2255     return MOK_GOOD;
2256 }
2257
2258 static enum ea_type process_ea(operand *input, ea *output, int bits,
2259                                int addrbits, int rfield, opflags_t rflags)
2260 {
2261     bool forw_ref = !!(input->opflags & OPFLAG_UNKNOWN);
2262
2263     output->type    = EA_SCALAR;
2264     output->rip     = false;
2265
2266     /* REX flags for the rfield operand */
2267     output->rex     |= rexflags(rfield, rflags, REX_R | REX_P | REX_W | REX_H);
2268
2269     if (is_class(REGISTER, input->type)) {
2270         /*
2271          * It's a direct register.
2272          */
2273         opflags_t f;
2274
2275         if (!is_register(input->basereg))
2276             goto err;
2277
2278         f = regflag(input);
2279
2280         if (!is_class(REG_EA, f))
2281             goto err;
2282
2283         output->rex         |= op_rexflags(input, REX_B | REX_P | REX_W | REX_H);
2284         output->sib_present = false;    /* no SIB necessary */
2285         output->bytes       = 0;        /* no offset necessary either */
2286         output->modrm       = GEN_MODRM(3, rfield, nasm_regvals[input->basereg]);
2287     } else {
2288         /*
2289          * It's a memory reference.
2290          */
2291         if (input->basereg == -1 &&
2292             (input->indexreg == -1 || input->scale == 0)) {
2293             /*
2294              * It's a pure offset.
2295              */
2296             if (bits == 64 && ((input->type & IP_REL) == IP_REL) &&
2297                 input->segment == NO_SEG) {
2298                 nasm_error(ERR_WARNING | ERR_PASS1, "absolute address can not be RIP-relative");
2299                 input->type &= ~IP_REL;
2300                 input->type |= MEMORY;
2301             }
2302
2303             if (input->eaflags & EAF_BYTEOFFS ||
2304                 (input->eaflags & EAF_WORDOFFS &&
2305                  input->disp_size != (addrbits != 16 ? 32 : 16))) {
2306                 nasm_error(ERR_WARNING | ERR_PASS1, "displacement size ignored on absolute address");
2307             }
2308
2309             if (bits == 64 && (~input->type & IP_REL)) {
2310                 output->sib_present = true;
2311                 output->sib         = GEN_SIB(0, 4, 5);
2312                 output->bytes       = 4;
2313                 output->modrm       = GEN_MODRM(0, rfield, 4);
2314                 output->rip         = false;
2315             } else {
2316                 output->sib_present = false;
2317                 output->bytes       = (addrbits != 16 ? 4 : 2);
2318                 output->modrm       = GEN_MODRM(0, rfield, (addrbits != 16 ? 5 : 6));
2319                 output->rip         = bits == 64;
2320             }
2321         } else {
2322             /*
2323              * It's an indirection.
2324              */
2325             int i = input->indexreg, b = input->basereg, s = input->scale;
2326             int32_t seg = input->segment;
2327             int hb = input->hintbase, ht = input->hinttype;
2328             int t, it, bt;              /* register numbers */
2329             opflags_t x, ix, bx;        /* register flags */
2330
2331             if (s == 0)
2332                 i = -1;         /* make this easy, at least */
2333
2334             if (is_register(i)) {
2335                 it = nasm_regvals[i];
2336                 ix = nasm_reg_flags[i];
2337             } else {
2338                 it = -1;
2339                 ix = 0;
2340             }
2341
2342             if (is_register(b)) {
2343                 bt = nasm_regvals[b];
2344                 bx = nasm_reg_flags[b];
2345             } else {
2346                 bt = -1;
2347                 bx = 0;
2348             }
2349
2350             /* if either one are a vector register... */
2351             if ((ix|bx) & (XMMREG|YMMREG) & ~REG_EA) {
2352                 int32_t sok = BITS32 | BITS64;
2353                 int32_t o = input->offset;
2354                 int mod, scale, index, base;
2355
2356                 /*
2357                  * For a vector SIB, one has to be a vector and the other,
2358                  * if present, a GPR.  The vector must be the index operand.
2359                  */
2360                 if (it == -1 || (bx & (XMMREG|YMMREG) & ~REG_EA)) {
2361                     if (s == 0)
2362                         s = 1;
2363                     else if (s != 1)
2364                         goto err;
2365
2366                     t = bt, bt = it, it = t;
2367                     x = bx, bx = ix, ix = x;
2368                 }
2369
2370                 if (bt != -1) {
2371                     if (REG_GPR & ~bx)
2372                         goto err;
2373                     if (!(REG64 & ~bx) || !(REG32 & ~bx))
2374                         sok &= bx;
2375                     else
2376                         goto err;
2377                 }
2378
2379                 /*
2380                  * While we're here, ensure the user didn't specify
2381                  * WORD or QWORD
2382                  */
2383                 if (input->disp_size == 16 || input->disp_size == 64)
2384                     goto err;
2385
2386                 if (addrbits == 16 ||
2387                     (addrbits == 32 && !(sok & BITS32)) ||
2388                     (addrbits == 64 && !(sok & BITS64)))
2389                     goto err;
2390
2391                 output->type = (ix & YMMREG & ~REG_EA)
2392                     ? EA_YMMVSIB : EA_XMMVSIB;
2393
2394                 output->rex |= rexflags(it, ix, REX_X);
2395                 output->rex |= rexflags(bt, bx, REX_B);
2396
2397                 index = it & 7; /* it is known to be != -1 */
2398
2399                 switch (s) {
2400                 case 1:
2401                     scale = 0;
2402                     break;
2403                 case 2:
2404                     scale = 1;
2405                     break;
2406                 case 4:
2407                     scale = 2;
2408                     break;
2409                 case 8:
2410                     scale = 3;
2411                     break;
2412                 default:   /* then what the smeg is it? */
2413                     goto err;    /* panic */
2414                 }
2415
2416                 if (bt == -1) {
2417                     base = 5;
2418                     mod = 0;
2419                 } else {
2420                     base = (bt & 7);
2421                     if (base != REG_NUM_EBP && o == 0 &&
2422                         seg == NO_SEG && !forw_ref &&
2423                         !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2424                         mod = 0;
2425                     else if (input->eaflags & EAF_BYTEOFFS ||
2426                              (o >= -128 && o <= 127 &&
2427                               seg == NO_SEG && !forw_ref &&
2428                               !(input->eaflags & EAF_WORDOFFS)))
2429                         mod = 1;
2430                     else
2431                         mod = 2;
2432                 }
2433
2434                 output->sib_present = true;
2435                 output->bytes       = (bt == -1 || mod == 2 ? 4 : mod);
2436                 output->modrm       = GEN_MODRM(mod, rfield, 4);
2437                 output->sib         = GEN_SIB(scale, index, base);
2438             } else if ((ix|bx) & (BITS32|BITS64)) {
2439                 /*
2440                  * it must be a 32/64-bit memory reference. Firstly we have
2441                  * to check that all registers involved are type E/Rxx.
2442                  */
2443                 int32_t sok = BITS32 | BITS64;
2444                 int32_t o = input->offset;
2445
2446                 if (it != -1) {
2447                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2448                         sok &= ix;
2449                     else
2450                         goto err;
2451                 }
2452
2453                 if (bt != -1) {
2454                     if (REG_GPR & ~bx)
2455                         goto err; /* Invalid register */
2456                     if (~sok & bx & SIZE_MASK)
2457                         goto err; /* Invalid size */
2458                     sok &= bx;
2459                 }
2460
2461                 /*
2462                  * While we're here, ensure the user didn't specify
2463                  * WORD or QWORD
2464                  */
2465                 if (input->disp_size == 16 || input->disp_size == 64)
2466                     goto err;
2467
2468                 if (addrbits == 16 ||
2469                     (addrbits == 32 && !(sok & BITS32)) ||
2470                     (addrbits == 64 && !(sok & BITS64)))
2471                     goto err;
2472
2473                 /* now reorganize base/index */
2474                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2475                     ((hb == b && ht == EAH_NOTBASE) ||
2476                      (hb == i && ht == EAH_MAKEBASE))) {
2477                     /* swap if hints say so */
2478                     t = bt, bt = it, it = t;
2479                     x = bx, bx = ix, ix = x;
2480                 }
2481                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2482                     bt = -1, bx = 0, s++;
2483                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2484                     /* make single reg base, unless hint */
2485                     bt = it, bx = ix, it = -1, ix = 0;
2486                 }
2487                 if (((s == 2 && it != REG_NUM_ESP && !(input->eaflags & EAF_TIMESTWO)) ||
2488                       s == 3 || s == 5 || s == 9) && bt == -1)
2489                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2490                 if (it == -1 && (bt & 7) != REG_NUM_ESP &&
2491                     (input->eaflags & EAF_TIMESTWO))
2492                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2493                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2494                 if (s == 1 && it == REG_NUM_ESP) {
2495                     /* swap ESP into base if scale is 1 */
2496                     t = it, it = bt, bt = t;
2497                     x = ix, ix = bx, bx = x;
2498                 }
2499                 if (it == REG_NUM_ESP ||
2500                     (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2501                     goto err;        /* wrong, for various reasons */
2502
2503                 output->rex |= rexflags(it, ix, REX_X);
2504                 output->rex |= rexflags(bt, bx, REX_B);
2505
2506                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2507                     /* no SIB needed */
2508                     int mod, rm;
2509
2510                     if (bt == -1) {
2511                         rm = 5;
2512                         mod = 0;
2513                     } else {
2514                         rm = (bt & 7);
2515                         if (rm != REG_NUM_EBP && o == 0 &&
2516                             seg == NO_SEG && !forw_ref &&
2517                             !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2518                             mod = 0;
2519                         else if (input->eaflags & EAF_BYTEOFFS ||
2520                                  (o >= -128 && o <= 127 &&
2521                                   seg == NO_SEG && !forw_ref &&
2522                                   !(input->eaflags & EAF_WORDOFFS)))
2523                             mod = 1;
2524                         else
2525                             mod = 2;
2526                     }
2527
2528                     output->sib_present = false;
2529                     output->bytes       = (bt == -1 || mod == 2 ? 4 : mod);
2530                     output->modrm       = GEN_MODRM(mod, rfield, rm);
2531                 } else {
2532                     /* we need a SIB */
2533                     int mod, scale, index, base;
2534
2535                     if (it == -1)
2536                         index = 4, s = 1;
2537                     else
2538                         index = (it & 7);
2539
2540                     switch (s) {
2541                     case 1:
2542                         scale = 0;
2543                         break;
2544                     case 2:
2545                         scale = 1;
2546                         break;
2547                     case 4:
2548                         scale = 2;
2549                         break;
2550                     case 8:
2551                         scale = 3;
2552                         break;
2553                     default:   /* then what the smeg is it? */
2554                         goto err;    /* panic */
2555                     }
2556
2557                     if (bt == -1) {
2558                         base = 5;
2559                         mod = 0;
2560                     } else {
2561                         base = (bt & 7);
2562                         if (base != REG_NUM_EBP && o == 0 &&
2563                             seg == NO_SEG && !forw_ref &&
2564                             !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2565                             mod = 0;
2566                         else if (input->eaflags & EAF_BYTEOFFS ||
2567                                  (o >= -128 && o <= 127 &&
2568                                   seg == NO_SEG && !forw_ref &&
2569                                   !(input->eaflags & EAF_WORDOFFS)))
2570                             mod = 1;
2571                         else
2572                             mod = 2;
2573                     }
2574
2575                     output->sib_present = true;
2576                     output->bytes       = (bt == -1 || mod == 2 ? 4 : mod);
2577                     output->modrm       = GEN_MODRM(mod, rfield, 4);
2578                     output->sib         = GEN_SIB(scale, index, base);
2579                 }
2580             } else {            /* it's 16-bit */
2581                 int mod, rm;
2582                 int16_t o = input->offset;
2583
2584                 /* check for 64-bit long mode */
2585                 if (addrbits == 64)
2586                     goto err;
2587
2588                 /* check all registers are BX, BP, SI or DI */
2589                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI && b != R_DI) ||
2590                     (i != -1 && i != R_BP && i != R_BX && i != R_SI && i != R_DI))
2591                     goto err;
2592
2593                 /* ensure the user didn't specify DWORD/QWORD */
2594                 if (input->disp_size == 32 || input->disp_size == 64)
2595                     goto err;
2596
2597                 if (s != 1 && i != -1)
2598                     goto err;        /* no can do, in 16-bit EA */
2599                 if (b == -1 && i != -1) {
2600                     int tmp = b;
2601                     b = i;
2602                     i = tmp;
2603                 }               /* swap */
2604                 if ((b == R_SI || b == R_DI) && i != -1) {
2605                     int tmp = b;
2606                     b = i;
2607                     i = tmp;
2608                 }
2609                 /* have BX/BP as base, SI/DI index */
2610                 if (b == i)
2611                     goto err;        /* shouldn't ever happen, in theory */
2612                 if (i != -1 && b != -1 &&
2613                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2614                     goto err;        /* invalid combinations */
2615                 if (b == -1)            /* pure offset: handled above */
2616                     goto err;        /* so if it gets to here, panic! */
2617
2618                 rm = -1;
2619                 if (i != -1)
2620                     switch (i * 256 + b) {
2621                     case R_SI * 256 + R_BX:
2622                         rm = 0;
2623                         break;
2624                     case R_DI * 256 + R_BX:
2625                         rm = 1;
2626                         break;
2627                     case R_SI * 256 + R_BP:
2628                         rm = 2;
2629                         break;
2630                     case R_DI * 256 + R_BP:
2631                         rm = 3;
2632                         break;
2633                 } else
2634                     switch (b) {
2635                     case R_SI:
2636                         rm = 4;
2637                         break;
2638                     case R_DI:
2639                         rm = 5;
2640                         break;
2641                     case R_BP:
2642                         rm = 6;
2643                         break;
2644                     case R_BX:
2645                         rm = 7;
2646                         break;
2647                     }
2648                 if (rm == -1)           /* can't happen, in theory */
2649                     goto err;        /* so panic if it does */
2650
2651                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2652                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2653                     mod = 0;
2654                 else if (input->eaflags & EAF_BYTEOFFS ||
2655                          (o >= -128 && o <= 127 && seg == NO_SEG &&
2656                           !forw_ref && !(input->eaflags & EAF_WORDOFFS)))
2657                     mod = 1;
2658                 else
2659                     mod = 2;
2660
2661                 output->sib_present = false;    /* no SIB - it's 16-bit */
2662                 output->bytes       = mod;      /* bytes of offset needed */
2663                 output->modrm       = GEN_MODRM(mod, rfield, rm);
2664             }
2665         }
2666     }
2667
2668     output->size = 1 + output->sib_present + output->bytes;
2669     return output->type;
2670
2671 err:
2672     return output->type = EA_INVALID;
2673 }
2674
2675 static void add_asp(insn *ins, int addrbits)
2676 {
2677     int j, valid;
2678     int defdisp;
2679
2680     valid = (addrbits == 64) ? 64|32 : 32|16;
2681
2682     switch (ins->prefixes[PPS_ASIZE]) {
2683     case P_A16:
2684         valid &= 16;
2685         break;
2686     case P_A32:
2687         valid &= 32;
2688         break;
2689     case P_A64:
2690         valid &= 64;
2691         break;
2692     case P_ASP:
2693         valid &= (addrbits == 32) ? 16 : 32;
2694         break;
2695     default:
2696         break;
2697     }
2698
2699     for (j = 0; j < ins->operands; j++) {
2700         if (is_class(MEMORY, ins->oprs[j].type)) {
2701             opflags_t i, b;
2702
2703             /* Verify as Register */
2704             if (!is_register(ins->oprs[j].indexreg))
2705                 i = 0;
2706             else
2707                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2708
2709             /* Verify as Register */
2710             if (!is_register(ins->oprs[j].basereg))
2711                 b = 0;
2712             else
2713                 b = nasm_reg_flags[ins->oprs[j].basereg];
2714
2715             if (ins->oprs[j].scale == 0)
2716                 i = 0;
2717
2718             if (!i && !b) {
2719                 int ds = ins->oprs[j].disp_size;
2720                 if ((addrbits != 64 && ds > 8) ||
2721                     (addrbits == 64 && ds == 16))
2722                     valid &= ds;
2723             } else {
2724                 if (!(REG16 & ~b))
2725                     valid &= 16;
2726                 if (!(REG32 & ~b))
2727                     valid &= 32;
2728                 if (!(REG64 & ~b))
2729                     valid &= 64;
2730
2731                 if (!(REG16 & ~i))
2732                     valid &= 16;
2733                 if (!(REG32 & ~i))
2734                     valid &= 32;
2735                 if (!(REG64 & ~i))
2736                     valid &= 64;
2737             }
2738         }
2739     }
2740
2741     if (valid & addrbits) {
2742         ins->addr_size = addrbits;
2743     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2744         /* Add an address size prefix */
2745         ins->prefixes[PPS_ASIZE] = (addrbits == 32) ? P_A16 : P_A32;;
2746         ins->addr_size = (addrbits == 32) ? 16 : 32;
2747     } else {
2748         /* Impossible... */
2749         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2750         ins->addr_size = addrbits; /* Error recovery */
2751     }
2752
2753     defdisp = ins->addr_size == 16 ? 16 : 32;
2754
2755     for (j = 0; j < ins->operands; j++) {
2756         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2757             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp) != ins->addr_size) {
2758             /*
2759              * mem_offs sizes must match the address size; if not,
2760              * strip the MEM_OFFS bit and match only EA instructions
2761              */
2762             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2763         }
2764     }
2765 }