assemble.c

   1 /* ----------------------------------------------------------------------- *
   2  *
   3  *   Copyright 1996-2011 The NASM Authors - All Rights Reserved
   4  *   See the file AUTHORS included with the NASM distribution for
   5  *   the specific copyright holders.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following
   9  *   conditions are met:
  10  *
  11  *   * Redistributions of source code must retain the above copyright
  12  *     notice, this list of conditions and the following disclaimer.
  13  *   * Redistributions in binary form must reproduce the above
  14  *     copyright notice, this list of conditions and the following
  15  *     disclaimer in the documentation and/or other materials provided
  16  *     with the distribution.
  17  *
  18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  19  *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  20  *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  21  *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22  *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  23  *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24  *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  25  *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28  *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  29  *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  30  *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31  *
  32  * ----------------------------------------------------------------------- */
  33
  34 /*
  35  * assemble.c   code generation for the Netwide Assembler
  36  *
  37  * the actual codes (C syntax, i.e. octal):
  38  * \0            - terminates the code. (Unless it's a literal of course.)
  39  * \1..\4        - that many literal bytes follow in the code stream
  40  * \5            - add 4 to the primary operand number (b, low octdigit)
  41  * \6            - add 4 to the secondary operand number (a, middle octdigit)
  42  * \7            - add 4 to both the primary and the secondary operand number
  43  * \10..\13      - a literal byte follows in the code stream, to be added
  44  *                 to the register value of operand 0..3
  45  * \14..\17      - a signed byte immediate operand, from operand 0..3
  46  * \20..\23      - a byte immediate operand, from operand 0..3
  47  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  48  * \30..\33      - a word immediate operand, from operand 0..3
  49  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  50  *                 assembly mode or the operand-size override on the operand
  51  * \40..\43      - a long immediate operand, from operand 0..3
  52  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  53  *                 depending on the address size of the instruction.
  54  * \50..\53      - a byte relative operand, from operand 0..3
  55  * \54..\57      - a qword immediate operand, from operand 0..3
  56  * \60..\63      - a word relative operand, from operand 0..3
  57  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  58  *                 assembly mode or the operand-size override on the operand
  59  * \70..\73      - a long relative operand, from operand 0..3
  60  * \74..\77      - a word constant, from the _segment_ part of operand 0..3
  61  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  62  *                 field the register value of operand b.
  63  * \140..\143    - an immediate word or signed byte for operand 0..3
  64  * \144..\147    - or 2 (s-field) into opcode byte if operand 0..3
  65  *                  is a signed byte rather than a word.  Opcode byte follows.
  66  * \150..\153    - an immediate dword or signed byte for operand 0..3
  67  * \154..\157    - or 2 (s-field) into opcode byte if operand 0..3
  68  *                  is a signed byte rather than a dword.  Opcode byte follows.
  69  * \172\ab       - the register number from operand a in bits 7..4, with
  70  *                 the 4-bit immediate from operand b in bits 3..0.
  71  * \173\xab      - the register number from operand a in bits 7..4, with
  72  *                 the value b in bits 3..0.
  73  * \174..\177    - the register number from operand 0..3 in bits 7..4, and
  74  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  75  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  76  *                 field equal to digit b.
  77  * \250..\253    - same as \150..\153, except warn if the 64-bit operand
  78  *                 is not equal to the truncated and sign-extended 32-bit
  79  *                 operand; used for 32-bit immediates in 64-bit mode.
  80  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
  81  * \260..\263    - this instruction uses VEX/XOP rather than REX, with the
  82  *                 V field taken from operand 0..3.
  83  * \270          - this instruction uses VEX/XOP rather than REX, with the
  84  *                 V field set to 1111b.
  85  *
  86  * VEX/XOP prefixes are followed by the sequence:
  87  * \tmm\wlp        where mm is the M field; and wlp is:
  88  *                 00 wwl lpp
  89  *                 [l0]  ll = 0 for L = 0 (.128, .lz)
  90  *                 [l1]  ll = 1 for L = 1 (.256)
  91  *                 [lig] ll = 2 for L don't care (always assembled as 0)
  92  *
  93  *                 [w0]  ww = 0 for W = 0
  94  *                 [w1 ] ww = 1 for W = 1
  95  *                 [wig] ww = 2 for W don't care (always assembled as 0)
  96  *                 [ww]  ww = 3 for W used as REX.W
  97  *
  98  * t = 0 for VEX (C4/C5), t = 1 for XOP (8F).
  99  *
 100  * \274..\277    - a signed byte immediate operand, from operand 0..3,
 101  *                 which is to be extended to the operand size.
 102  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
 103  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
 104  * \312          - (disassembler only) invalid with non-default address size.
 105  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
 106  * \314          - (disassembler only) invalid with REX.B
 107  * \315          - (disassembler only) invalid with REX.X
 108  * \316          - (disassembler only) invalid with REX.R
 109  * \317          - (disassembler only) invalid with REX.W
 110  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
 111  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
 112  * \322          - indicates that this instruction is only valid when the
 113  *                 operand size is the default (instruction to disassembler,
 114  *                 generates no code in the assembler)
 115  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
 116  * \324          - indicates 64-bit operand size requiring REX prefix.
 117  * \325          - instruction which always uses spl/bpl/sil/dil
 118  * \330          - a literal byte follows in the code stream, to be added
 119  *                 to the condition code value of the instruction.
 120  * \331          - instruction not valid with REP prefix.  Hint for
 121  *                 disassembler only; for SSE instructions.
 122  * \332          - REP prefix (0xF2 byte) used as opcode extension.
 123  * \333          - REP prefix (0xF3 byte) used as opcode extension.
 124  * \334          - LOCK prefix used as REX.R (used in non-64-bit mode)
 125  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
 126  * \336          - force a REP(E) prefix (0xF2) even if not specified.
 127  * \337          - force a REPNE prefix (0xF3) even if not specified.
 128  *                 \336-\337 are still listed as prefixes in the disassembler.
 129  * \340          - reserve <operand 0> bytes of uninitialized storage.
 130  *                 Operand 0 had better be a segmentless constant.
 131  * \341          - this instruction needs a WAIT "prefix"
 132  * \344,\345     - the PUSH/POP (respectively) codes for CS, DS, ES, SS
 133  *                 (POP is never used for CS) depending on operand 0
 134  * \346,\347     - the second byte of PUSH/POP codes for FS, GS, depending
 135  *                 on operand 0
 136  * \360          - no SSE prefix (== \364\331)
 137  * \361          - 66 SSE prefix (== \366\331)
 138  * \362          - F2 SSE prefix (== \364\332)
 139  * \363          - F3 SSE prefix (== \364\333)
 140  * \364          - operand-size prefix (0x66) not permitted
 141  * \365          - address-size prefix (0x67) not permitted
 142  * \366          - operand-size prefix (0x66) used as opcode extension
 143  * \367          - address-size prefix (0x67) used as opcode extension
 144  * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
 145  *                 370 is used for Jcc, 371 is used for JMP.
 146  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 147  *                 used for conditional jump over longer jump
 148  * \374          - this instruction takes an XMM VSIB memory EA
 149  * \375          - this instruction takes an YMM VSIB memory EA
 150  */
 151
 152 #include "compiler.h"
 153
 154 #include <stdio.h>
 155 #include <string.h>
 156 #include <inttypes.h>
 157
 158 #include "nasm.h"
 159 #include "nasmlib.h"
 160 #include "assemble.h"
 161 #include "insns.h"
 162 #include "tables.h"
 163
 164 enum match_result {
 165     /*
 166      * Matching errors.  These should be sorted so that more specific
 167      * errors come later in the sequence.
 168      */
 169     MERR_INVALOP,
 170     MERR_OPSIZEMISSING,
 171     MERR_OPSIZEMISMATCH,
 172     MERR_BADCPU,
 173     MERR_BADMODE,
 174     /*
 175      * Matching success; the conditional ones first
 176      */
 177     MOK_JUMP,   /* Matching OK but needs jmp_match() */
 178     MOK_GOOD    /* Matching unconditionally OK */
 179 };
 180
 181 typedef struct {
 182     enum ea_type type;            /* what kind of EA is this? */
 183     int sib_present;              /* is a SIB byte necessary? */
 184     int bytes;                    /* # of bytes of offset needed */
 185     int size;                     /* lazy - this is sib+bytes+1 */
 186     uint8_t modrm, sib, rex, rip; /* the bytes themselves */
 187 } ea;
 188
 189 #define GEN_SIB(scale, index, base)                 \
 190         (((scale) << 6) | ((index) << 3) | ((base)))
 191
 192 #define GEN_MODRM(mod, reg, rm)                     \
 193         (((mod) << 6) | (((reg) & 7) << 3) | ((rm) & 7))
 194
 195 static uint32_t cpu;            /* cpu level received from nasm.c */
 196 static efunc errfunc;
 197 static struct ofmt *outfmt;
 198 static ListGen *list;
 199
 200 static int64_t calcsize(int32_t, int64_t, int, insn *, const uint8_t *);
 201 static void gencode(int32_t segment, int64_t offset, int bits,
 202                     insn * ins, const struct itemplate *temp,
 203                     int64_t insn_end);
 204 static enum match_result find_match(const struct itemplate **tempp,
 205                                     insn *instruction,
 206                                     int32_t segment, int64_t offset, int bits);
 207 static enum match_result matches(const struct itemplate *, insn *, int bits);
 208 static opflags_t regflag(const operand *);
 209 static int32_t regval(const operand *);
 210 static int rexflags(int, opflags_t, int);
 211 static int op_rexflags(const operand *, int);
 212 static void add_asp(insn *, int);
 213
 214 static enum ea_type process_ea(operand *, ea *, int, int, int, opflags_t);
 215
 216 static int has_prefix(insn * ins, enum prefix_pos pos, int prefix)
 217 {
 218     return ins->prefixes[pos] == prefix;
 219 }
 220
 221 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 222 {
 223     if (ins->prefixes[pos])
 224         errfunc(ERR_NONFATAL, "invalid %s prefix",
 225                 prefix_name(ins->prefixes[pos]));
 226 }
 227
 228 static const char *size_name(int size)
 229 {
 230     switch (size) {
 231     case 1:
 232         return "byte";
 233     case 2:
 234         return "word";
 235     case 4:
 236         return "dword";
 237     case 8:
 238         return "qword";
 239     case 10:
 240         return "tword";
 241     case 16:
 242         return "oword";
 243     case 32:
 244         return "yword";
 245     default:
 246         return "???";
 247     }
 248 }
 249
 250 static void warn_overflow(int pass, int size)
 251 {
 252     errfunc(ERR_WARNING | pass | ERR_WARN_NOV,
 253             "%s data exceeds bounds", size_name(size));
 254 }
 255
 256 static void warn_overflow_const(int64_t data, int size)
 257 {
 258     if (overflow_general(data, size))
 259         warn_overflow(ERR_PASS1, size);
 260 }
 261
 262 static void warn_overflow_opd(const struct operand *o, int size)
 263 {
 264     if (o->wrt == NO_SEG && o->segment == NO_SEG) {
 265         if (overflow_general(o->offset, size))
 266             warn_overflow(ERR_PASS2, size);
 267     }
 268 }
 269
 270 /*
 271  * This routine wrappers the real output format's output routine,
 272  * in order to pass a copy of the data off to the listing file
 273  * generator at the same time.
 274  */
 275 static void out(int64_t offset, int32_t segto, const void *data,
 276                 enum out_type type, uint64_t size,
 277                 int32_t segment, int32_t wrt)
 278 {
 279     static int32_t lineno = 0;     /* static!!! */
 280     static char *lnfname = NULL;
 281     uint8_t p[8];
 282
 283     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 284         /*
 285          * This is a non-relocated address, and we're going to
 286          * convert it into RAWDATA format.
 287          */
 288         uint8_t *q = p;
 289
 290         if (size > 8) {
 291             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 292             return;
 293         }
 294
 295         WRITEADDR(q, *(int64_t *)data, size);
 296         data = p;
 297         type = OUT_RAWDATA;
 298     }
 299
 300     list->output(offset, data, type, size);
 301
 302     /*
 303      * this call to src_get determines when we call the
 304      * debug-format-specific "linenum" function
 305      * it updates lineno and lnfname to the current values
 306      * returning 0 if "same as last time", -2 if lnfname
 307      * changed, and the amount by which lineno changed,
 308      * if it did. thus, these variables must be static
 309      */
 310
 311     if (src_get(&lineno, &lnfname))
 312         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 313
 314     outfmt->output(segto, data, type, size, segment, wrt);
 315 }
 316
 317 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 318                      insn * ins, const uint8_t *code)
 319 {
 320     int64_t isize;
 321     uint8_t c = code[0];
 322
 323     if ((c != 0370 && c != 0371) || (ins->oprs[0].type & STRICT))
 324         return false;
 325     if (!optimizing)
 326         return false;
 327     if (optimizing < 0 && c == 0371)
 328         return false;
 329
 330     isize = calcsize(segment, offset, bits, ins, code);
 331
 332     if (ins->oprs[0].opflags & OPFLAG_UNKNOWN)
 333         /* Be optimistic in pass 1 */
 334         return true;
 335
 336     if (ins->oprs[0].segment != segment)
 337         return false;
 338
 339     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 340     return (isize >= -128 && isize <= 127); /* is it byte size? */
 341 }
 342
 343 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 344                  insn * instruction, struct ofmt *output, efunc error,
 345                  ListGen * listgen)
 346 {
 347     const struct itemplate *temp;
 348     int j;
 349     enum match_result m;
 350     int64_t insn_end;
 351     int32_t itimes;
 352     int64_t start = offset;
 353     int64_t wsize;              /* size for DB etc. */
 354
 355     errfunc = error;            /* to pass to other functions */
 356     cpu = cp;
 357     outfmt = output;            /* likewise */
 358     list = listgen;             /* and again */
 359
 360     wsize = idata_bytes(instruction->opcode);
 361     if (wsize == -1)
 362         return 0;
 363
 364     if (wsize) {
 365         extop *e;
 366         int32_t t = instruction->times;
 367         if (t < 0)
 368             errfunc(ERR_PANIC,
 369                     "instruction->times < 0 (%ld) in assemble()", t);
 370
 371         while (t--) {           /* repeat TIMES times */
 372             list_for_each(e, instruction->eops) {
 373                 if (e->type == EOT_DB_NUMBER) {
 374                     if (wsize > 8) {
 375                         errfunc(ERR_NONFATAL,
 376                                 "integer supplied to a DT, DO or DY"
 377                                 " instruction");
 378                     } else {
 379                         out(offset, segment, &e->offset,
 380                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 381                         offset += wsize;
 382                     }
 383                 } else if (e->type == EOT_DB_STRING ||
 384                            e->type == EOT_DB_STRING_FREE) {
 385                     int align;
 386
 387                     out(offset, segment, e->stringval,
 388                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 389                     align = e->stringlen % wsize;
 390
 391                     if (align) {
 392                         align = wsize - align;
 393                         out(offset, segment, zero_buffer,
 394                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 395                     }
 396                     offset += e->stringlen + align;
 397                 }
 398             }
 399             if (t > 0 && t == instruction->times - 1) {
 400                 /*
 401                  * Dummy call to list->output to give the offset to the
 402                  * listing module.
 403                  */
 404                 list->output(offset, NULL, OUT_RAWDATA, 0);
 405                 list->uplevel(LIST_TIMES);
 406             }
 407         }
 408         if (instruction->times > 1)
 409             list->downlevel(LIST_TIMES);
 410         return offset - start;
 411     }
 412
 413     if (instruction->opcode == I_INCBIN) {
 414         const char *fname = instruction->eops->stringval;
 415         FILE *fp;
 416
 417         fp = fopen(fname, "rb");
 418         if (!fp) {
 419             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 420                   fname);
 421         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 422             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 423                   fname);
 424         } else {
 425             static char buf[4096];
 426             size_t t = instruction->times;
 427             size_t base = 0;
 428             size_t len;
 429
 430             len = ftell(fp);
 431             if (instruction->eops->next) {
 432                 base = instruction->eops->next->offset;
 433                 len -= base;
 434                 if (instruction->eops->next->next &&
 435                     len > (size_t)instruction->eops->next->next->offset)
 436                     len = (size_t)instruction->eops->next->next->offset;
 437             }
 438             /*
 439              * Dummy call to list->output to give the offset to the
 440              * listing module.
 441              */
 442             list->output(offset, NULL, OUT_RAWDATA, 0);
 443             list->uplevel(LIST_INCBIN);
 444             while (t--) {
 445                 size_t l;
 446
 447                 fseek(fp, base, SEEK_SET);
 448                 l = len;
 449                 while (l > 0) {
 450                     int32_t m;
 451                     m = fread(buf, 1, l > sizeof(buf) ? sizeof(buf) : l, fp);
 452                     if (!m) {
 453                         /*
 454                          * This shouldn't happen unless the file
 455                          * actually changes while we are reading
 456                          * it.
 457                          */
 458                         error(ERR_NONFATAL,
 459                               "`incbin': unexpected EOF while"
 460                               " reading file `%s'", fname);
 461                         t = 0;  /* Try to exit cleanly */
 462                         break;
 463                     }
 464                     out(offset, segment, buf, OUT_RAWDATA, m,
 465                         NO_SEG, NO_SEG);
 466                     l -= m;
 467                 }
 468             }
 469             list->downlevel(LIST_INCBIN);
 470             if (instruction->times > 1) {
 471                 /*
 472                  * Dummy call to list->output to give the offset to the
 473                  * listing module.
 474                  */
 475                 list->output(offset, NULL, OUT_RAWDATA, 0);
 476                 list->uplevel(LIST_TIMES);
 477                 list->downlevel(LIST_TIMES);
 478             }
 479             fclose(fp);
 480             return instruction->times * len;
 481         }
 482         return 0;               /* if we're here, there's an error */
 483     }
 484
 485     /* Check to see if we need an address-size prefix */
 486     add_asp(instruction, bits);
 487
 488     m = find_match(&temp, instruction, segment, offset, bits);
 489
 490     if (m == MOK_GOOD) {
 491         /* Matches! */
 492         int64_t insn_size = calcsize(segment, offset, bits,
 493                                      instruction, temp->code);
 494         itimes = instruction->times;
 495         if (insn_size < 0)  /* shouldn't be, on pass two */
 496             error(ERR_PANIC, "errors made it through from pass one");
 497         else
 498             while (itimes--) {
 499                 for (j = 0; j < MAXPREFIX; j++) {
 500                     uint8_t c = 0;
 501                     switch (instruction->prefixes[j]) {
 502                     case P_WAIT:
 503                         c = 0x9B;
 504                         break;
 505                     case P_LOCK:
 506                         c = 0xF0;
 507                         break;
 508                     case P_REPNE:
 509                     case P_REPNZ:
 510                         c = 0xF2;
 511                         break;
 512                     case P_REPE:
 513                     case P_REPZ:
 514                     case P_REP:
 515                         c = 0xF3;
 516                         break;
 517                     case R_CS:
 518                         if (bits == 64) {
 519                             error(ERR_WARNING | ERR_PASS2,
 520                                   "cs segment base generated, but will be ignored in 64-bit mode");
 521                         }
 522                         c = 0x2E;
 523                         break;
 524                     case R_DS:
 525                         if (bits == 64) {
 526                             error(ERR_WARNING | ERR_PASS2,
 527                                   "ds segment base generated, but will be ignored in 64-bit mode");
 528                         }
 529                         c = 0x3E;
 530                         break;
 531                     case R_ES:
 532                         if (bits == 64) {
 533                             error(ERR_WARNING | ERR_PASS2,
 534                                   "es segment base generated, but will be ignored in 64-bit mode");
 535                         }
 536                         c = 0x26;
 537                         break;
 538                     case R_FS:
 539                         c = 0x64;
 540                         break;
 541                     case R_GS:
 542                         c = 0x65;
 543                         break;
 544                     case R_SS:
 545                         if (bits == 64) {
 546                             error(ERR_WARNING | ERR_PASS2,
 547                                   "ss segment base generated, but will be ignored in 64-bit mode");
 548                         }
 549                         c = 0x36;
 550                         break;
 551                     case R_SEGR6:
 552                     case R_SEGR7:
 553                         error(ERR_NONFATAL,
 554                               "segr6 and segr7 cannot be used as prefixes");
 555                         break;
 556                     case P_A16:
 557                         if (bits == 64) {
 558                             error(ERR_NONFATAL,
 559                                   "16-bit addressing is not supported "
 560                                   "in 64-bit mode");
 561                         } else if (bits != 16)
 562                             c = 0x67;
 563                         break;
 564                     case P_A32:
 565                         if (bits != 32)
 566                             c = 0x67;
 567                         break;
 568                     case P_A64:
 569                         if (bits != 64) {
 570                             error(ERR_NONFATAL,
 571                                   "64-bit addressing is only supported "
 572                                   "in 64-bit mode");
 573                         }
 574                         break;
 575                     case P_ASP:
 576                         c = 0x67;
 577                         break;
 578                     case P_O16:
 579                         if (bits != 16)
 580                             c = 0x66;
 581                         break;
 582                     case P_O32:
 583                         if (bits == 16)
 584                             c = 0x66;
 585                         break;
 586                     case P_O64:
 587                         /* REX.W */
 588                         break;
 589                     case P_OSP:
 590                         c = 0x66;
 591                         break;
 592                     case P_none:
 593                         break;
 594                     default:
 595                         error(ERR_PANIC, "invalid instruction prefix");
 596                     }
 597                     if (c != 0) {
 598                         out(offset, segment, &c, OUT_RAWDATA, 1,
 599                             NO_SEG, NO_SEG);
 600                         offset++;
 601                     }
 602                 }
 603                 insn_end = offset + insn_size;
 604                 gencode(segment, offset, bits, instruction,
 605                         temp, insn_end);
 606                 offset += insn_size;
 607                 if (itimes > 0 && itimes == instruction->times - 1) {
 608                     /*
 609                      * Dummy call to list->output to give the offset to the
 610                      * listing module.
 611                      */
 612                     list->output(offset, NULL, OUT_RAWDATA, 0);
 613                     list->uplevel(LIST_TIMES);
 614                 }
 615             }
 616         if (instruction->times > 1)
 617             list->downlevel(LIST_TIMES);
 618         return offset - start;
 619     } else {
 620         /* No match */
 621         switch (m) {
 622         case MERR_OPSIZEMISSING:
 623             error(ERR_NONFATAL, "operation size not specified");
 624             break;
 625         case MERR_OPSIZEMISMATCH:
 626             error(ERR_NONFATAL, "mismatch in operand sizes");
 627             break;
 628         case MERR_BADCPU:
 629             error(ERR_NONFATAL, "no instruction for this cpu level");
 630             break;
 631         case MERR_BADMODE:
 632             error(ERR_NONFATAL, "instruction not supported in %d-bit mode",
 633                   bits);
 634             break;
 635         default:
 636             error(ERR_NONFATAL,
 637                   "invalid combination of opcode and operands");
 638             break;
 639         }
 640     }
 641     return 0;
 642 }
 643
 644 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 645                   insn * instruction, efunc error)
 646 {
 647     const struct itemplate *temp;
 648     enum match_result m;
 649
 650     errfunc = error;            /* to pass to other functions */
 651     cpu = cp;
 652
 653     if (instruction->opcode == I_none)
 654         return 0;
 655
 656     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 657         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 658         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 659         instruction->opcode == I_DY) {
 660         extop *e;
 661         int32_t isize, osize, wsize;
 662
 663         isize = 0;
 664         wsize = idata_bytes(instruction->opcode);
 665
 666         list_for_each(e, instruction->eops) {
 667             int32_t align;
 668
 669             osize = 0;
 670             if (e->type == EOT_DB_NUMBER) {
 671                 osize = 1;
 672                 warn_overflow_const(e->offset, wsize);
 673             } else if (e->type == EOT_DB_STRING ||
 674                        e->type == EOT_DB_STRING_FREE)
 675                 osize = e->stringlen;
 676
 677             align = (-osize) % wsize;
 678             if (align < 0)
 679                 align += wsize;
 680             isize += osize + align;
 681         }
 682         return isize * instruction->times;
 683     }
 684
 685     if (instruction->opcode == I_INCBIN) {
 686         const char *fname = instruction->eops->stringval;
 687         FILE *fp;
 688         int64_t val = 0;
 689         size_t len;
 690
 691         fp = fopen(fname, "rb");
 692         if (!fp)
 693             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 694                   fname);
 695         else if (fseek(fp, 0L, SEEK_END) < 0)
 696             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 697                   fname);
 698         else {
 699             len = ftell(fp);
 700             if (instruction->eops->next) {
 701                 len -= instruction->eops->next->offset;
 702                 if (instruction->eops->next->next &&
 703                     len > (size_t)instruction->eops->next->next->offset) {
 704                     len = (size_t)instruction->eops->next->next->offset;
 705                 }
 706             }
 707             val = instruction->times * len;
 708         }
 709         if (fp)
 710             fclose(fp);
 711         return val;
 712     }
 713
 714     /* Check to see if we need an address-size prefix */
 715     add_asp(instruction, bits);
 716
 717     m = find_match(&temp, instruction, segment, offset, bits);
 718     if (m == MOK_GOOD) {
 719         /* we've matched an instruction. */
 720         int64_t isize;
 721         const uint8_t *codes = temp->code;
 722         int j;
 723
 724         isize = calcsize(segment, offset, bits, instruction, codes);
 725         if (isize < 0)
 726             return -1;
 727         for (j = 0; j < MAXPREFIX; j++) {
 728             switch (instruction->prefixes[j]) {
 729             case P_A16:
 730                 if (bits != 16)
 731                     isize++;
 732                 break;
 733             case P_A32:
 734                 if (bits != 32)
 735                     isize++;
 736                 break;
 737             case P_O16:
 738                 if (bits != 16)
 739                     isize++;
 740                 break;
 741             case P_O32:
 742                 if (bits == 16)
 743                     isize++;
 744                 break;
 745             case P_A64:
 746             case P_O64:
 747             case P_none:
 748                 break;
 749             default:
 750                 isize++;
 751                 break;
 752             }
 753         }
 754         return isize * instruction->times;
 755     } else {
 756         return -1;                  /* didn't match any instruction */
 757     }
 758 }
 759
 760 static bool possible_sbyte(operand *o)
 761 {
 762     return o->wrt == NO_SEG && o->segment == NO_SEG &&
 763         !(o->opflags & OPFLAG_UNKNOWN) &&
 764         optimizing >= 0 && !(o->type & STRICT);
 765 }
 766
 767 /* check that opn[op]  is a signed byte of size 16 or 32 */
 768 static bool is_sbyte16(operand *o)
 769 {
 770     int16_t v;
 771
 772     if (!possible_sbyte(o))
 773         return false;
 774
 775     v = o->offset;
 776     return v >= -128 && v <= 127;
 777 }
 778
 779 static bool is_sbyte32(operand *o)
 780 {
 781     int32_t v;
 782
 783     if (!possible_sbyte(o))
 784         return false;
 785
 786     v = o->offset;
 787     return v >= -128 && v <= 127;
 788 }
 789
 790 /* Common construct */
 791 #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
 792
 793 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 794                         insn * ins, const uint8_t *codes)
 795 {
 796     int64_t length = 0;
 797     uint8_t c;
 798     int rex_mask = ~0;
 799     int op1, op2;
 800     struct operand *opx;
 801     uint8_t opex = 0;
 802     enum ea_type eat;
 803
 804     ins->rex = 0;               /* Ensure REX is reset */
 805     eat = EA_SCALAR;            /* Expect a scalar EA */
 806
 807     if (ins->prefixes[PPS_OSIZE] == P_O64)
 808         ins->rex |= REX_W;
 809
 810     (void)segment;              /* Don't warn that this parameter is unused */
 811     (void)offset;               /* Don't warn that this parameter is unused */
 812
 813     while (*codes) {
 814         c = *codes++;
 815         op1 = (c & 3) + ((opex & 1) << 2);
 816         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
 817         opx = &ins->oprs[op1];
 818         opex = 0;               /* For the next iteration */
 819
 820         switch (c) {
 821         case 01:
 822         case 02:
 823         case 03:
 824         case 04:
 825             codes += c, length += c;
 826             break;
 827
 828         case 05:
 829         case 06:
 830         case 07:
 831             opex = c;
 832             break;
 833
 834         case4(010):
 835             ins->rex |=
 836                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 837             codes++, length++;
 838             break;
 839
 840         case4(014):
 841         case4(020):
 842         case4(024):
 843             length++;
 844             break;
 845
 846         case4(030):
 847             length += 2;
 848             break;
 849
 850         case4(034):
 851             if (opx->type & (BITS16 | BITS32 | BITS64))
 852                 length += (opx->type & BITS16) ? 2 : 4;
 853             else
 854                 length += (bits == 16) ? 2 : 4;
 855             break;
 856
 857         case4(040):
 858             length += 4;
 859             break;
 860
 861         case4(044):
 862             length += ins->addr_size >> 3;
 863             break;
 864
 865         case4(050):
 866             length++;
 867             break;
 868
 869         case4(054):
 870             length += 8; /* MOV reg64/imm */
 871             break;
 872
 873         case4(060):
 874             length += 2;
 875             break;
 876
 877         case4(064):
 878             if (opx->type & (BITS16 | BITS32 | BITS64))
 879                 length += (opx->type & BITS16) ? 2 : 4;
 880             else
 881                 length += (bits == 16) ? 2 : 4;
 882             break;
 883
 884         case4(070):
 885             length += 4;
 886             break;
 887
 888         case4(074):
 889             length += 2;
 890             break;
 891
 892         case4(0140):
 893             length += is_sbyte16(opx) ? 1 : 2;
 894             break;
 895
 896         case4(0144):
 897             codes++;
 898             length++;
 899             break;
 900
 901         case4(0150):
 902             length += is_sbyte32(opx) ? 1 : 4;
 903             break;
 904
 905         case4(0154):
 906             codes++;
 907             length++;
 908             break;
 909
 910         case 0172:
 911         case 0173:
 912             codes++;
 913             length++;
 914             break;
 915
 916         case4(0174):
 917             length++;
 918             break;
 919
 920         case4(0250):
 921             length += is_sbyte32(opx) ? 1 : 4;
 922             break;
 923
 924         case4(0254):
 925             length += 4;
 926             break;
 927
 928         case4(0260):
 929             ins->rex |= REX_V;
 930             ins->vexreg = regval(opx);
 931             ins->vex_cm = *codes++;
 932             ins->vex_wlp = *codes++;
 933             break;
 934
 935         case 0270:
 936             ins->rex |= REX_V;
 937             ins->vexreg = 0;
 938             ins->vex_cm = *codes++;
 939             ins->vex_wlp = *codes++;
 940             break;
 941
 942         case4(0274):
 943             length++;
 944             break;
 945
 946         case4(0300):
 947             break;
 948
 949         case 0310:
 950             if (bits == 64)
 951                 return -1;
 952             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
 953             break;
 954
 955         case 0311:
 956             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
 957             break;
 958
 959         case 0312:
 960             break;
 961
 962         case 0313:
 963             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
 964                 has_prefix(ins, PPS_ASIZE, P_A32))
 965                 return -1;
 966             break;
 967
 968         case4(0314):
 969             break;
 970
 971         case 0320:
 972         {
 973             enum prefixes pfx = ins->prefixes[PPS_OSIZE];
 974             if (pfx == P_O16)
 975                 break;
 976             if (pfx != P_none)
 977                 errfunc(ERR_WARNING | ERR_PASS2, "invalid operand size prefix");
 978             else
 979                 ins->prefixes[PPS_OSIZE] = P_O16;
 980             break;
 981         }
 982
 983         case 0321:
 984         {
 985             enum prefixes pfx = ins->prefixes[PPS_OSIZE];
 986             if (pfx == P_O32)
 987                 break;
 988             if (pfx != P_none)
 989                 errfunc(ERR_WARNING | ERR_PASS2, "invalid operand size prefix");
 990             else
 991                 ins->prefixes[PPS_OSIZE] = P_O32;
 992             break;
 993         }
 994
 995         case 0322:
 996             break;
 997
 998         case 0323:
 999             rex_mask &= ~REX_W;
1000             break;
1001
1002         case 0324:
1003             ins->rex |= REX_W;
1004             break;
1005
1006         case 0325:
1007             ins->rex |= REX_NH;
1008             break;
1009
1010         case 0330:
1011             codes++, length++;
1012             break;
1013
1014         case 0331:
1015             break;
1016
1017         case 0332:
1018         case 0333:
1019             length++;
1020             break;
1021
1022         case 0334:
1023             ins->rex |= REX_L;
1024             break;
1025
1026         case 0335:
1027             break;
1028
1029         case 0336:
1030             if (!ins->prefixes[PPS_REP])
1031                 ins->prefixes[PPS_REP] = P_REP;
1032             break;
1033
1034         case 0337:
1035             if (!ins->prefixes[PPS_REP])
1036                 ins->prefixes[PPS_REP] = P_REPNE;
1037             break;
1038
1039         case 0340:
1040             if (ins->oprs[0].segment != NO_SEG)
1041                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1042                         " quantity of BSS space");
1043             else
1044                 length += ins->oprs[0].offset;
1045             break;
1046
1047         case 0341:
1048             if (!ins->prefixes[PPS_WAIT])
1049                 ins->prefixes[PPS_WAIT] = P_WAIT;
1050             break;
1051
1052         case4(0344):
1053             length++;
1054             break;
1055
1056         case 0360:
1057             break;
1058
1059         case 0361:
1060         case 0362:
1061         case 0363:
1062             length++;
1063             break;
1064
1065         case 0364:
1066         case 0365:
1067             break;
1068
1069         case 0366:
1070         case 0367:
1071             length++;
1072             break;
1073
1074         case 0370:
1075         case 0371:
1076         case 0372:
1077             break;
1078
1079         case 0373:
1080             length++;
1081             break;
1082
1083         case 0374:
1084             eat = EA_XMMVSIB;
1085             break;
1086
1087         case 0375:
1088             eat = EA_YMMVSIB;
1089             break;
1090
1091         case4(0100):
1092         case4(0110):
1093         case4(0120):
1094         case4(0130):
1095         case4(0200):
1096         case4(0204):
1097         case4(0210):
1098         case4(0214):
1099         case4(0220):
1100         case4(0224):
1101         case4(0230):
1102         case4(0234):
1103             {
1104                 ea ea_data;
1105                 int rfield;
1106                 opflags_t rflags;
1107                 struct operand *opy = &ins->oprs[op2];
1108
1109                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1110
1111                 if (c <= 0177) {
1112                     /* pick rfield from operand b (opx) */
1113                     rflags = regflag(opx);
1114                     rfield = nasm_regvals[opx->basereg];
1115                 } else {
1116                     rflags = 0;
1117                     rfield = c & 7;
1118                 }
1119                 if (process_ea(opy, &ea_data, bits,ins->addr_size,
1120                                rfield, rflags) != eat) {
1121                     errfunc(ERR_NONFATAL, "invalid effective address");
1122                     return -1;
1123                 } else {
1124                     ins->rex |= ea_data.rex;
1125                     length += ea_data.size;
1126                 }
1127             }
1128             break;
1129
1130         default:
1131             errfunc(ERR_PANIC, "internal instruction table corrupt"
1132                     ": instruction code \\%o (0x%02X) given", c, c);
1133             break;
1134         }
1135     }
1136
1137     ins->rex &= rex_mask;
1138
1139     if (ins->rex & REX_NH) {
1140         if (ins->rex & REX_H) {
1141             errfunc(ERR_NONFATAL, "instruction cannot use high registers");
1142             return -1;
1143         }
1144         ins->rex &= ~REX_P;        /* Don't force REX prefix due to high reg */
1145     }
1146
1147     if (ins->rex & REX_V) {
1148         int bad32 = REX_R|REX_W|REX_X|REX_B;
1149
1150         if (ins->rex & REX_H) {
1151             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1152             return -1;
1153         }
1154         switch (ins->vex_wlp & 060) {
1155         case 000:
1156         case 040:
1157             ins->rex &= ~REX_W;
1158             break;
1159         case 020:
1160             ins->rex |= REX_W;
1161             bad32 &= ~REX_W;
1162             break;
1163         case 060:
1164             /* Follow REX_W */
1165             break;
1166         }
1167
1168         if (bits != 64 && ((ins->rex & bad32) || ins->vexreg > 7)) {
1169             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1170             return -1;
1171         }
1172         if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B)))
1173             length += 3;
1174         else
1175             length += 2;
1176     } else if (ins->rex & REX_REAL) {
1177         if (ins->rex & REX_H) {
1178             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1179             return -1;
1180         } else if (bits == 64) {
1181             length++;
1182         } else if ((ins->rex & REX_L) &&
1183                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1184                    cpu >= IF_X86_64) {
1185             /* LOCK-as-REX.R */
1186             assert_no_prefix(ins, PPS_LOCK);
1187             length++;
1188         } else {
1189             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1190             return -1;
1191         }
1192     }
1193
1194     return length;
1195 }
1196
1197 #define EMIT_REX()                                                              \
1198     if (!(ins->rex & REX_V) && (ins->rex & REX_REAL) && (bits == 64)) { \
1199         ins->rex = (ins->rex & REX_REAL)|REX_P;                                 \
1200         out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG);        \
1201         ins->rex = 0;                                                           \
1202         offset += 1;                                                            \
1203     }
1204
1205 static void gencode(int32_t segment, int64_t offset, int bits,
1206                     insn * ins, const struct itemplate *temp,
1207                     int64_t insn_end)
1208 {
1209     static const char condval[] = {   /* conditional opcodes */
1210         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1211         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1212         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1213     };
1214     uint8_t c;
1215     uint8_t bytes[4];
1216     int64_t size;
1217     int64_t data;
1218     int op1, op2;
1219     struct operand *opx;
1220     const uint8_t *codes = temp->code;
1221     uint8_t opex = 0;
1222     enum ea_type eat = EA_SCALAR;
1223
1224     while (*codes) {
1225         c = *codes++;
1226         op1 = (c & 3) + ((opex & 1) << 2);
1227         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
1228         opx = &ins->oprs[op1];
1229         opex = 0;                /* For the next iteration */
1230
1231         switch (c) {
1232         case 01:
1233         case 02:
1234         case 03:
1235         case 04:
1236             EMIT_REX();
1237             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1238             codes += c;
1239             offset += c;
1240             break;
1241
1242         case 05:
1243         case 06:
1244         case 07:
1245             opex = c;
1246             break;
1247
1248         case4(010):
1249             EMIT_REX();
1250             bytes[0] = *codes++ + (regval(opx) & 7);
1251             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1252             offset += 1;
1253             break;
1254
1255         case4(014):
1256             /*
1257              * The test for BITS8 and SBYTE here is intended to avoid
1258              * warning on optimizer actions due to SBYTE, while still
1259              * warn on explicit BYTE directives.  Also warn, obviously,
1260              * if the optimizer isn't enabled.
1261              */
1262             if (((opx->type & BITS8) ||
1263                  !(opx->type & temp->opd[op1] & BYTENESS)) &&
1264                 (opx->offset < -128 || opx->offset > 127)) {
1265                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1266                         "signed byte value exceeds bounds");
1267             }
1268             if (opx->segment != NO_SEG) {
1269                 data = opx->offset;
1270                 out(offset, segment, &data, OUT_ADDRESS, 1,
1271                     opx->segment, opx->wrt);
1272             } else {
1273                 bytes[0] = opx->offset;
1274                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1275                     NO_SEG);
1276             }
1277             offset += 1;
1278             break;
1279
1280         case4(020):
1281             if (opx->offset < -256 || opx->offset > 255) {
1282                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1283                         "byte value exceeds bounds");
1284             }
1285             if (opx->segment != NO_SEG) {
1286                 data = opx->offset;
1287                 out(offset, segment, &data, OUT_ADDRESS, 1,
1288                     opx->segment, opx->wrt);
1289             } else {
1290                 bytes[0] = opx->offset;
1291                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1292                     NO_SEG);
1293             }
1294             offset += 1;
1295             break;
1296
1297         case4(024):
1298             if (opx->offset < 0 || opx->offset > 255)
1299                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1300                         "unsigned byte value exceeds bounds");
1301             if (opx->segment != NO_SEG) {
1302                 data = opx->offset;
1303                 out(offset, segment, &data, OUT_ADDRESS, 1,
1304                     opx->segment, opx->wrt);
1305             } else {
1306                 bytes[0] = opx->offset;
1307                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1308                     NO_SEG);
1309             }
1310             offset += 1;
1311             break;
1312
1313         case4(030):
1314             warn_overflow_opd(opx, 2);
1315             data = opx->offset;
1316             out(offset, segment, &data, OUT_ADDRESS, 2,
1317                 opx->segment, opx->wrt);
1318             offset += 2;
1319             break;
1320
1321         case4(034):
1322             if (opx->type & (BITS16 | BITS32))
1323                 size = (opx->type & BITS16) ? 2 : 4;
1324             else
1325                 size = (bits == 16) ? 2 : 4;
1326             warn_overflow_opd(opx, size);
1327             data = opx->offset;
1328             out(offset, segment, &data, OUT_ADDRESS, size,
1329                 opx->segment, opx->wrt);
1330             offset += size;
1331             break;
1332
1333         case4(040):
1334             warn_overflow_opd(opx, 4);
1335             data = opx->offset;
1336             out(offset, segment, &data, OUT_ADDRESS, 4,
1337                 opx->segment, opx->wrt);
1338             offset += 4;
1339             break;
1340
1341         case4(044):
1342             data = opx->offset;
1343             size = ins->addr_size >> 3;
1344             warn_overflow_opd(opx, size);
1345             out(offset, segment, &data, OUT_ADDRESS, size,
1346                 opx->segment, opx->wrt);
1347             offset += size;
1348             break;
1349
1350         case4(050):
1351             if (opx->segment != segment) {
1352                 data = opx->offset;
1353                 out(offset, segment, &data,
1354                     OUT_REL1ADR, insn_end - offset,
1355                     opx->segment, opx->wrt);
1356             } else {
1357                 data = opx->offset - insn_end;
1358                 if (data > 127 || data < -128)
1359                     errfunc(ERR_NONFATAL, "short jump is out of range");
1360                 out(offset, segment, &data,
1361                     OUT_ADDRESS, 1, NO_SEG, NO_SEG);
1362             }
1363             offset += 1;
1364             break;
1365
1366         case4(054):
1367             data = (int64_t)opx->offset;
1368             out(offset, segment, &data, OUT_ADDRESS, 8,
1369                 opx->segment, opx->wrt);
1370             offset += 8;
1371             break;
1372
1373         case4(060):
1374             if (opx->segment != segment) {
1375                 data = opx->offset;
1376                 out(offset, segment, &data,
1377                     OUT_REL2ADR, insn_end - offset,
1378                     opx->segment, opx->wrt);
1379             } else {
1380                 data = opx->offset - insn_end;
1381                 out(offset, segment, &data,
1382                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1383             }
1384             offset += 2;
1385             break;
1386
1387         case4(064):
1388             if (opx->type & (BITS16 | BITS32 | BITS64))
1389                 size = (opx->type & BITS16) ? 2 : 4;
1390             else
1391                 size = (bits == 16) ? 2 : 4;
1392             if (opx->segment != segment) {
1393                 data = opx->offset;
1394                 out(offset, segment, &data,
1395                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1396                     insn_end - offset, opx->segment, opx->wrt);
1397             } else {
1398                 data = opx->offset - insn_end;
1399                 out(offset, segment, &data,
1400                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1401             }
1402             offset += size;
1403             break;
1404
1405         case4(070):
1406             if (opx->segment != segment) {
1407                 data = opx->offset;
1408                 out(offset, segment, &data,
1409                     OUT_REL4ADR, insn_end - offset,
1410                     opx->segment, opx->wrt);
1411             } else {
1412                 data = opx->offset - insn_end;
1413                 out(offset, segment, &data,
1414                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1415             }
1416             offset += 4;
1417             break;
1418
1419         case4(074):
1420             if (opx->segment == NO_SEG)
1421                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1422                         " relocatable");
1423             data = 0;
1424             out(offset, segment, &data, OUT_ADDRESS, 2,
1425                 outfmt->segbase(1 + opx->segment),
1426                 opx->wrt);
1427             offset += 2;
1428             break;
1429
1430         case4(0140):
1431             data = opx->offset;
1432             warn_overflow_opd(opx, 2);
1433             if (is_sbyte16(opx)) {
1434                 bytes[0] = data;
1435                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1436                     NO_SEG);
1437                 offset++;
1438             } else {
1439                 out(offset, segment, &data, OUT_ADDRESS, 2,
1440                     opx->segment, opx->wrt);
1441                 offset += 2;
1442             }
1443             break;
1444
1445         case4(0144):
1446             EMIT_REX();
1447             bytes[0] = *codes++;
1448             if (is_sbyte16(opx))
1449                 bytes[0] |= 2;  /* s-bit */
1450             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1451             offset++;
1452             break;
1453
1454         case4(0150):
1455             data = opx->offset;
1456             warn_overflow_opd(opx, 4);
1457             if (is_sbyte32(opx)) {
1458                 bytes[0] = data;
1459                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1460                     NO_SEG);
1461                 offset++;
1462             } else {
1463                 out(offset, segment, &data, OUT_ADDRESS, 4,
1464                     opx->segment, opx->wrt);
1465                 offset += 4;
1466             }
1467             break;
1468
1469         case4(0154):
1470             EMIT_REX();
1471             bytes[0] = *codes++;
1472             if (is_sbyte32(opx))
1473                 bytes[0] |= 2;  /* s-bit */
1474             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1475             offset++;
1476             break;
1477
1478         case 0172:
1479             c = *codes++;
1480             opx = &ins->oprs[c >> 3];
1481             bytes[0] = nasm_regvals[opx->basereg] << 4;
1482             opx = &ins->oprs[c & 7];
1483             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1484                 errfunc(ERR_NONFATAL,
1485                         "non-absolute expression not permitted as argument %d",
1486                         c & 7);
1487             } else {
1488                 if (opx->offset & ~15) {
1489                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1490                             "four-bit argument exceeds bounds");
1491                 }
1492                 bytes[0] |= opx->offset & 15;
1493             }
1494             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1495             offset++;
1496             break;
1497
1498         case 0173:
1499             c = *codes++;
1500             opx = &ins->oprs[c >> 4];
1501             bytes[0] = nasm_regvals[opx->basereg] << 4;
1502             bytes[0] |= c & 15;
1503             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1504             offset++;
1505             break;
1506
1507         case4(0174):
1508             bytes[0] = nasm_regvals[opx->basereg] << 4;
1509             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1510             offset++;
1511             break;
1512
1513         case4(0250):
1514             data = opx->offset;
1515             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1516                 (int32_t)data != (int64_t)data) {
1517                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1518                         "signed dword immediate exceeds bounds");
1519             }
1520             if (is_sbyte32(opx)) {
1521                 bytes[0] = data;
1522                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1523                     NO_SEG);
1524                 offset++;
1525             } else {
1526                 out(offset, segment, &data, OUT_ADDRESS, 4,
1527                     opx->segment, opx->wrt);
1528                 offset += 4;
1529             }
1530             break;
1531
1532         case4(0254):
1533             data = opx->offset;
1534             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1535                 (int32_t)data != (int64_t)data) {
1536                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1537                         "signed dword immediate exceeds bounds");
1538             }
1539             out(offset, segment, &data, OUT_ADDRESS, 4,
1540                 opx->segment, opx->wrt);
1541             offset += 4;
1542             break;
1543
1544         case4(0260):
1545         case 0270:
1546             codes += 2;
1547             if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1548                 bytes[0] = (ins->vex_cm >> 6) ? 0x8f : 0xc4;
1549                 bytes[1] = (ins->vex_cm & 31) | ((~ins->rex & 7) << 5);
1550                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1551                     ((~ins->vexreg & 15)<< 3) | (ins->vex_wlp & 07);
1552                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1553                 offset += 3;
1554             } else {
1555                 bytes[0] = 0xc5;
1556                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1557                     ((~ins->vexreg & 15) << 3) | (ins->vex_wlp & 07);
1558                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1559                 offset += 2;
1560             }
1561             break;
1562
1563         case4(0274):
1564         {
1565             uint64_t uv, um;
1566             int s;
1567
1568             if (ins->rex & REX_W)
1569                 s = 64;
1570             else if (ins->prefixes[PPS_OSIZE] == P_O16)
1571                 s = 16;
1572             else if (ins->prefixes[PPS_OSIZE] == P_O32)
1573                 s = 32;
1574             else
1575                 s = bits;
1576
1577             um = (uint64_t)2 << (s-1);
1578             uv = opx->offset;
1579
1580             if (uv > 127 && uv < (uint64_t)-128 &&
1581                 (uv < um-128 || uv > um-1)) {
1582                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1583                         "signed byte value exceeds bounds");
1584             }
1585             if (opx->segment != NO_SEG) {
1586                 data = uv;
1587                 out(offset, segment, &data, OUT_ADDRESS, 1,
1588                     opx->segment, opx->wrt);
1589             } else {
1590                 bytes[0] = uv;
1591                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1592                     NO_SEG);
1593             }
1594             offset += 1;
1595             break;
1596         }
1597
1598         case4(0300):
1599             break;
1600
1601         case 0310:
1602             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1603                 *bytes = 0x67;
1604                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1605                 offset += 1;
1606             } else
1607                 offset += 0;
1608             break;
1609
1610         case 0311:
1611             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1612                 *bytes = 0x67;
1613                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1614                 offset += 1;
1615             } else
1616                 offset += 0;
1617             break;
1618
1619         case 0312:
1620             break;
1621
1622         case 0313:
1623             ins->rex = 0;
1624             break;
1625
1626         case4(0314):
1627             break;
1628
1629         case 0320:
1630         case 0321:
1631             break;
1632
1633         case 0322:
1634         case 0323:
1635             break;
1636
1637         case 0324:
1638             ins->rex |= REX_W;
1639             break;
1640
1641         case 0325:
1642             break;
1643
1644         case 0330:
1645             *bytes = *codes++ ^ condval[ins->condition];
1646             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1647             offset += 1;
1648             break;
1649
1650         case 0331:
1651             break;
1652
1653         case 0332:
1654         case 0333:
1655             *bytes = c - 0332 + 0xF2;
1656             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1657             offset += 1;
1658             break;
1659
1660         case 0334:
1661             if (ins->rex & REX_R) {
1662                 *bytes = 0xF0;
1663                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1664                 offset += 1;
1665             }
1666             ins->rex &= ~(REX_L|REX_R);
1667             break;
1668
1669         case 0335:
1670             break;
1671
1672         case 0336:
1673         case 0337:
1674             break;
1675
1676         case 0340:
1677             if (ins->oprs[0].segment != NO_SEG)
1678                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1679             else {
1680                 int64_t size = ins->oprs[0].offset;
1681                 if (size > 0)
1682                     out(offset, segment, NULL,
1683                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1684                 offset += size;
1685             }
1686             break;
1687
1688         case 0341:
1689             break;
1690
1691         case 0344:
1692         case 0345:
1693             bytes[0] = c & 1;
1694             switch (ins->oprs[0].basereg) {
1695             case R_CS:
1696                 bytes[0] += 0x0E;
1697                 break;
1698             case R_DS:
1699                 bytes[0] += 0x1E;
1700                 break;
1701             case R_ES:
1702                 bytes[0] += 0x06;
1703                 break;
1704             case R_SS:
1705                 bytes[0] += 0x16;
1706                 break;
1707             default:
1708                 errfunc(ERR_PANIC,
1709                         "bizarre 8086 segment register received");
1710             }
1711             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1712             offset++;
1713             break;
1714
1715         case 0346:
1716         case 0347:
1717             bytes[0] = c & 1;
1718             switch (ins->oprs[0].basereg) {
1719             case R_FS:
1720                 bytes[0] += 0xA0;
1721                 break;
1722             case R_GS:
1723                 bytes[0] += 0xA8;
1724                 break;
1725             default:
1726                 errfunc(ERR_PANIC,
1727                         "bizarre 386 segment register received");
1728             }
1729             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1730             offset++;
1731             break;
1732
1733         case 0360:
1734             break;
1735
1736         case 0361:
1737             bytes[0] = 0x66;
1738             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1739             offset += 1;
1740             break;
1741
1742         case 0362:
1743         case 0363:
1744             bytes[0] = c - 0362 + 0xf2;
1745             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1746             offset += 1;
1747             break;
1748
1749         case 0364:
1750         case 0365:
1751             break;
1752
1753         case 0366:
1754         case 0367:
1755             *bytes = c - 0366 + 0x66;
1756             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1757             offset += 1;
1758             break;
1759
1760         case 0370:
1761         case 0371:
1762         case 0372:
1763             break;
1764
1765         case 0373:
1766             *bytes = bits == 16 ? 3 : 5;
1767             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1768             offset += 1;
1769             break;
1770
1771         case 0374:
1772             eat = EA_XMMVSIB;
1773             break;
1774
1775         case 0375:
1776             eat = EA_YMMVSIB;
1777             break;
1778
1779         case4(0100):
1780         case4(0110):
1781         case4(0120):
1782         case4(0130):
1783         case4(0200):
1784         case4(0204):
1785         case4(0210):
1786         case4(0214):
1787         case4(0220):
1788         case4(0224):
1789         case4(0230):
1790         case4(0234):
1791             {
1792                 ea ea_data;
1793                 int rfield;
1794                 opflags_t rflags;
1795                 uint8_t *p;
1796                 int32_t s;
1797                 struct operand *opy = &ins->oprs[op2];
1798
1799                 if (c <= 0177) {
1800                     /* pick rfield from operand b (opx) */
1801                     rflags = regflag(opx);
1802                     rfield = nasm_regvals[opx->basereg];
1803                 } else {
1804                     /* rfield is constant */
1805                     rflags = 0;
1806                     rfield = c & 7;
1807                 }
1808
1809                 if (process_ea(opy, &ea_data, bits, ins->addr_size,
1810                                rfield, rflags) != eat)
1811                     errfunc(ERR_NONFATAL, "invalid effective address");
1812
1813                 p = bytes;
1814                 *p++ = ea_data.modrm;
1815                 if (ea_data.sib_present)
1816                     *p++ = ea_data.sib;
1817
1818                 s = p - bytes;
1819                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1820
1821                 /*
1822                  * Make sure the address gets the right offset in case
1823                  * the line breaks in the .lst file (BR 1197827)
1824                  */
1825                 offset += s;
1826                 s = 0;
1827
1828                 switch (ea_data.bytes) {
1829                 case 0:
1830                     break;
1831                 case 1:
1832                 case 2:
1833                 case 4:
1834                 case 8:
1835                     data = opy->offset;
1836                     s += ea_data.bytes;
1837                     if (ea_data.rip) {
1838                         if (opy->segment == segment) {
1839                             data -= insn_end;
1840                             if (overflow_signed(data, ea_data.bytes))
1841                                 warn_overflow(ERR_PASS2, ea_data.bytes);
1842                             out(offset, segment, &data, OUT_ADDRESS,
1843                                 ea_data.bytes, NO_SEG, NO_SEG);
1844                         } else {
1845                             /* overflow check in output/linker? */
1846                             out(offset, segment, &data,        OUT_REL4ADR,
1847                                 insn_end - offset, opy->segment, opy->wrt);
1848                         }
1849                     } else {
1850                         if (overflow_general(opy->offset, ins->addr_size >> 3) ||
1851                             signed_bits(opy->offset, ins->addr_size) !=
1852                             signed_bits(opy->offset, ea_data.bytes * 8))
1853                             warn_overflow(ERR_PASS2, ea_data.bytes);
1854
1855                         out(offset, segment, &data, OUT_ADDRESS,
1856                             ea_data.bytes, opy->segment, opy->wrt);
1857                     }
1858                     break;
1859                 default:
1860                     /* Impossible! */
1861                     errfunc(ERR_PANIC,
1862                             "Invalid amount of bytes (%d) for offset?!",
1863                             ea_data.bytes);
1864                     break;
1865                 }
1866                 offset += s;
1867             }
1868             break;
1869
1870         default:
1871             errfunc(ERR_PANIC, "internal instruction table corrupt"
1872                     ": instruction code \\%o (0x%02X) given", c, c);
1873             break;
1874         }
1875     }
1876 }
1877
1878 static opflags_t regflag(const operand * o)
1879 {
1880     if (!is_register(o->basereg))
1881         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1882     return nasm_reg_flags[o->basereg];
1883 }
1884
1885 static int32_t regval(const operand * o)
1886 {
1887     if (!is_register(o->basereg))
1888         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1889     return nasm_regvals[o->basereg];
1890 }
1891
1892 static int op_rexflags(const operand * o, int mask)
1893 {
1894     opflags_t flags;
1895     int val;
1896
1897     if (!is_register(o->basereg))
1898         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1899
1900     flags = nasm_reg_flags[o->basereg];
1901     val = nasm_regvals[o->basereg];
1902
1903     return rexflags(val, flags, mask);
1904 }
1905
1906 static int rexflags(int val, opflags_t flags, int mask)
1907 {
1908     int rex = 0;
1909
1910     if (val >= 8)
1911         rex |= REX_B|REX_X|REX_R;
1912     if (flags & BITS64)
1913         rex |= REX_W;
1914     if (!(REG_HIGH & ~flags))                   /* AH, CH, DH, BH */
1915         rex |= REX_H;
1916     else if (!(REG8 & ~flags) && val >= 4)      /* SPL, BPL, SIL, DIL */
1917         rex |= REX_P;
1918
1919     return rex & mask;
1920 }
1921
1922 static enum match_result find_match(const struct itemplate **tempp,
1923                                     insn *instruction,
1924                                     int32_t segment, int64_t offset, int bits)
1925 {
1926     const struct itemplate *temp;
1927     enum match_result m, merr;
1928     opflags_t xsizeflags[MAX_OPERANDS];
1929     bool opsizemissing = false;
1930     int i;
1931
1932     for (i = 0; i < instruction->operands; i++)
1933         xsizeflags[i] = instruction->oprs[i].type & SIZE_MASK;
1934
1935     merr = MERR_INVALOP;
1936
1937     for (temp = nasm_instructions[instruction->opcode];
1938          temp->opcode != I_none; temp++) {
1939         m = matches(temp, instruction, bits);
1940         if (m == MOK_JUMP) {
1941             if (jmp_match(segment, offset, bits, instruction, temp->code))
1942                 m = MOK_GOOD;
1943             else
1944                 m = MERR_INVALOP;
1945         } else if (m == MERR_OPSIZEMISSING &&
1946                    (temp->flags & IF_SMASK) != IF_SX) {
1947             /*
1948              * Missing operand size and a candidate for fuzzy matching...
1949              */
1950             for (i = 0; i < temp->operands; i++) {
1951                 if ((temp->opd[i] & SAME_AS) == 0)
1952                     xsizeflags[i] |= temp->opd[i] & SIZE_MASK;
1953             }
1954             opsizemissing = true;
1955         }
1956         if (m > merr)
1957             merr = m;
1958         if (merr == MOK_GOOD)
1959             goto done;
1960     }
1961
1962     /* No match, but see if we can get a fuzzy operand size match... */
1963     if (!opsizemissing)
1964         goto done;
1965
1966     for (i = 0; i < instruction->operands; i++) {
1967         /*
1968          * We ignore extrinsic operand sizes on registers, so we should
1969          * never try to fuzzy-match on them.  This also resolves the case
1970          * when we have e.g. "xmmrm128" in two different positions.
1971          */
1972         if (is_class(REGISTER, instruction->oprs[i].type))
1973             continue;
1974
1975         /* This tests if xsizeflags[i] has more than one bit set */
1976         if ((xsizeflags[i] & (xsizeflags[i]-1)))
1977             goto done;                /* No luck */
1978
1979         instruction->oprs[i].type |= xsizeflags[i]; /* Set the size */
1980     }
1981
1982     /* Try matching again... */
1983     for (temp = nasm_instructions[instruction->opcode];
1984          temp->opcode != I_none; temp++) {
1985         m = matches(temp, instruction, bits);
1986         if (m == MOK_JUMP) {
1987             if (jmp_match(segment, offset, bits, instruction, temp->code))
1988                 m = MOK_GOOD;
1989             else
1990                 m = MERR_INVALOP;
1991         }
1992         if (m > merr)
1993             merr = m;
1994         if (merr == MOK_GOOD)
1995             goto done;
1996     }
1997
1998 done:
1999     *tempp = temp;
2000     return merr;
2001 }
2002
2003 static enum match_result matches(const struct itemplate *itemp,
2004                                  insn *instruction, int bits)
2005 {
2006     int i, size[MAX_OPERANDS], asize, oprs;
2007     bool opsizemissing = false;
2008
2009     /*
2010      * Check the opcode
2011      */
2012     if (itemp->opcode != instruction->opcode)
2013         return MERR_INVALOP;
2014
2015     /*
2016      * Count the operands
2017      */
2018     if (itemp->operands != instruction->operands)
2019         return MERR_INVALOP;
2020
2021     /*
2022      * Is it legal?
2023      */
2024     if (!(optimizing > 0) && (itemp->flags & IF_OPT))
2025         return MERR_INVALOP;
2026
2027     /*
2028      * Check that no spurious colons or TOs are present
2029      */
2030     for (i = 0; i < itemp->operands; i++)
2031         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
2032             return MERR_INVALOP;
2033
2034     /*
2035      * Process size flags
2036      */
2037     switch (itemp->flags & IF_SMASK) {
2038     case IF_SB:
2039         asize = BITS8;
2040         break;
2041     case IF_SW:
2042         asize = BITS16;
2043         break;
2044     case IF_SD:
2045         asize = BITS32;
2046         break;
2047     case IF_SQ:
2048         asize = BITS64;
2049         break;
2050     case IF_SO:
2051         asize = BITS128;
2052         break;
2053     case IF_SY:
2054         asize = BITS256;
2055         break;
2056     case IF_SZ:
2057         switch (bits) {
2058         case 16:
2059             asize = BITS16;
2060             break;
2061         case 32:
2062             asize = BITS32;
2063             break;
2064         case 64:
2065             asize = BITS64;
2066             break;
2067         default:
2068             asize = 0;
2069             break;
2070         }
2071         break;
2072     default:
2073         asize = 0;
2074         break;
2075     }
2076
2077     if (itemp->flags & IF_ARMASK) {
2078         /* S- flags only apply to a specific operand */
2079         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
2080         memset(size, 0, sizeof size);
2081         size[i] = asize;
2082     } else {
2083         /* S- flags apply to all operands */
2084         for (i = 0; i < MAX_OPERANDS; i++)
2085             size[i] = asize;
2086     }
2087
2088     /*
2089      * Check that the operand flags all match up,
2090      * it's a bit tricky so lets be verbose:
2091      *
2092      * 1) Find out the size of operand. If instruction
2093      *    doesn't have one specified -- we're trying to
2094      *    guess it either from template (IF_S* flag) or
2095      *    from code bits.
2096      *
2097      * 2) If template operand (i) has SAME_AS flag [used for registers only]
2098      *    (ie the same operand as was specified somewhere in template, and
2099      *    this referred operand index is being achieved via ~SAME_AS)
2100      *    we are to be sure that both registers (in template and instruction)
2101      *    do exactly match.
2102      *
2103      * 3) If template operand do not match the instruction OR
2104      *    template has an operand size specified AND this size differ
2105      *    from which instruction has (perhaps we got it from code bits)
2106      *    we are:
2107      *      a)  Check that only size of instruction and operand is differ
2108      *          other characteristics do match
2109      *      b)  Perhaps it's a register specified in instruction so
2110      *          for such a case we just mark that operand as "size
2111      *          missing" and this will turn on fuzzy operand size
2112      *          logic facility (handled by a caller)
2113      */
2114     for (i = 0; i < itemp->operands; i++) {
2115         opflags_t type = instruction->oprs[i].type;
2116         if (!(type & SIZE_MASK))
2117             type |= size[i];
2118
2119         if (itemp->opd[i] & SAME_AS) {
2120             int j = itemp->opd[i] & ~SAME_AS;
2121             if (type != instruction->oprs[j].type ||
2122                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2123                 return MERR_INVALOP;
2124         } else if (itemp->opd[i] & ~type ||
2125             ((itemp->opd[i] & SIZE_MASK) &&
2126              ((itemp->opd[i] ^ type) & SIZE_MASK))) {
2127             if ((itemp->opd[i] & ~type & ~SIZE_MASK) || (type & SIZE_MASK)) {
2128                 return MERR_INVALOP;
2129             } else if (!is_class(REGISTER, type)) {
2130                 /*
2131                  * Note: we don't honor extrinsic operand sizes for registers,
2132                  * so "missing operand size" for a register should be
2133                  * considered a wildcard match rather than an error.
2134                  */
2135                 opsizemissing = true;
2136             }
2137         }
2138     }
2139
2140     if (opsizemissing)
2141         return MERR_OPSIZEMISSING;
2142
2143     /*
2144      * Check operand sizes
2145      */
2146     if (itemp->flags & (IF_SM | IF_SM2)) {
2147         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2148         for (i = 0; i < oprs; i++) {
2149             asize = itemp->opd[i] & SIZE_MASK;
2150             if (asize) {
2151                 for (i = 0; i < oprs; i++)
2152                     size[i] = asize;
2153                 break;
2154             }
2155         }
2156     } else {
2157         oprs = itemp->operands;
2158     }
2159
2160     for (i = 0; i < itemp->operands; i++) {
2161         if (!(itemp->opd[i] & SIZE_MASK) &&
2162             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2163             return MERR_OPSIZEMISMATCH;
2164     }
2165
2166     /*
2167      * Check template is okay at the set cpu level
2168      */
2169     if (((itemp->flags & IF_PLEVEL) > cpu))
2170         return MERR_BADCPU;
2171
2172     /*
2173      * Verify the appropriate long mode flag.
2174      */
2175     if ((itemp->flags & (bits == 64 ? IF_NOLONG : IF_LONG)))
2176         return MERR_BADMODE;
2177
2178     /*
2179      * Check if special handling needed for Jumps
2180      */
2181     if ((itemp->code[0] & 0374) == 0370)
2182         return MOK_JUMP;
2183
2184     return MOK_GOOD;
2185 }
2186
2187 static enum ea_type process_ea(operand *input, ea *output, int bits,
2188                                int addrbits, int rfield, opflags_t rflags)
2189 {
2190     bool forw_ref = !!(input->opflags & OPFLAG_UNKNOWN);
2191
2192     output->type    = EA_SCALAR;
2193     output->rip     = false;
2194
2195     /* REX flags for the rfield operand */
2196     output->rex     |= rexflags(rfield, rflags, REX_R | REX_P | REX_W | REX_H);
2197
2198     if (is_class(REGISTER, input->type)) {
2199         /*
2200          * It's a direct register.
2201          */
2202         opflags_t f;
2203
2204         if (!is_register(input->basereg))
2205             goto err;
2206
2207         f = regflag(input);
2208
2209         if (!is_class(REG_EA, f))
2210             goto err;
2211
2212         output->rex         |= op_rexflags(input, REX_B | REX_P | REX_W | REX_H);
2213         output->sib_present = false;    /* no SIB necessary */
2214         output->bytes       = 0;        /* no offset necessary either */
2215         output->modrm       = GEN_MODRM(3, rfield, nasm_regvals[input->basereg]);
2216     } else {
2217         /*
2218          * It's a memory reference.
2219          */
2220         if (input->basereg == -1 &&
2221             (input->indexreg == -1 || input->scale == 0)) {
2222             /*
2223              * It's a pure offset.
2224              */
2225             if (bits == 64 && ((input->type & IP_REL) == IP_REL) &&
2226                 input->segment == NO_SEG) {
2227                 nasm_error(ERR_WARNING | ERR_PASS1, "absolute address can not be RIP-relative");
2228                 input->type &= ~IP_REL;
2229                 input->type |= MEMORY;
2230             }
2231
2232             if (input->eaflags & EAF_BYTEOFFS ||
2233                 (input->eaflags & EAF_WORDOFFS &&
2234                  input->disp_size != (addrbits != 16 ? 32 : 16))) {
2235                 nasm_error(ERR_WARNING | ERR_PASS1, "displacement size ignored on absolute address");
2236             }
2237
2238             if (bits == 64 && (~input->type & IP_REL)) {
2239                 output->sib_present = true;
2240                 output->sib         = GEN_SIB(0, 4, 5);
2241                 output->bytes       = 4;
2242                 output->modrm       = GEN_MODRM(0, rfield, 4);
2243                 output->rip         = false;
2244             } else {
2245                 output->sib_present = false;
2246                 output->bytes       = (addrbits != 16 ? 4 : 2);
2247                 output->modrm       = GEN_MODRM(0, rfield, (addrbits != 16 ? 5 : 6));
2248                 output->rip         = bits == 64;
2249             }
2250         } else {
2251             /*
2252              * It's an indirection.
2253              */
2254             int i = input->indexreg, b = input->basereg, s = input->scale;
2255             int32_t seg = input->segment;
2256             int hb = input->hintbase, ht = input->hinttype;
2257             int t, it, bt;              /* register numbers */
2258             opflags_t x, ix, bx;        /* register flags */
2259
2260             if (s == 0)
2261                 i = -1;         /* make this easy, at least */
2262
2263             if (is_register(i)) {
2264                 it = nasm_regvals[i];
2265                 ix = nasm_reg_flags[i];
2266             } else {
2267                 it = -1;
2268                 ix = 0;
2269             }
2270
2271             if (is_register(b)) {
2272                 bt = nasm_regvals[b];
2273                 bx = nasm_reg_flags[b];
2274             } else {
2275                 bt = -1;
2276                 bx = 0;
2277             }
2278
2279             /* if either one are a vector register... */
2280             if ((ix|bx) & (XMMREG|YMMREG) & ~REG_EA) {
2281                 int32_t sok = BITS32 | BITS64;
2282                 int32_t o = input->offset;
2283                 int mod, scale, index, base;
2284
2285                 /*
2286                  * For a vector SIB, one has to be a vector and the other,
2287                  * if present, a GPR.  The vector must be the index operand.
2288                  */
2289                 if (it == -1 || (bx & (XMMREG|YMMREG) & ~REG_EA)) {
2290                     if (s == 0)
2291                         s = 1;
2292                     else if (s != 1)
2293                         goto err;
2294
2295                     t = bt, bt = it, it = t;
2296                     x = bx, bx = ix, ix = x;
2297                 }
2298
2299                 if (bt != -1) {
2300                     if (REG_GPR & ~bx)
2301                         goto err;
2302                     if (!(REG64 & ~bx) || !(REG32 & ~bx))
2303                         sok &= bx;
2304                     else
2305                         goto err;
2306                 }
2307
2308                 /*
2309                  * While we're here, ensure the user didn't specify
2310                  * WORD or QWORD
2311                  */
2312                 if (input->disp_size == 16 || input->disp_size == 64)
2313                     goto err;
2314
2315                 if (addrbits == 16 ||
2316                     (addrbits == 32 && !(sok & BITS32)) ||
2317                     (addrbits == 64 && !(sok & BITS64)))
2318                     goto err;
2319
2320                 output->type = (ix & YMMREG & ~REG_EA)
2321                     ? EA_YMMVSIB : EA_XMMVSIB;
2322
2323                 output->rex |= rexflags(it, ix, REX_X);
2324                 output->rex |= rexflags(bt, bx, REX_B);
2325
2326                 index = it & 7; /* it is known to be != -1 */
2327
2328                 switch (s) {
2329                 case 1:
2330                     scale = 0;
2331                     break;
2332                 case 2:
2333                     scale = 1;
2334                     break;
2335                 case 4:
2336                     scale = 2;
2337                     break;
2338                 case 8:
2339                     scale = 3;
2340                     break;
2341                 default:   /* then what the smeg is it? */
2342                     goto err;    /* panic */
2343                 }
2344
2345                 if (bt == -1) {
2346                     base = 5;
2347                     mod = 0;
2348                 } else {
2349                     base = (bt & 7);
2350                     if (base != REG_NUM_EBP && o == 0 &&
2351                         seg == NO_SEG && !forw_ref &&
2352                         !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2353                         mod = 0;
2354                     else if (input->eaflags & EAF_BYTEOFFS ||
2355                              (o >= -128 && o <= 127 &&
2356                               seg == NO_SEG && !forw_ref &&
2357                               !(input->eaflags & EAF_WORDOFFS)))
2358                         mod = 1;
2359                     else
2360                         mod = 2;
2361                 }
2362
2363                 output->sib_present = true;
2364                 output->bytes       = (bt == -1 || mod == 2 ? 4 : mod);
2365                 output->modrm       = GEN_MODRM(mod, rfield, 4);
2366                 output->sib         = GEN_SIB(scale, index, base);
2367             } else if ((ix|bx) & (BITS32|BITS64)) {
2368                 /*
2369                  * it must be a 32/64-bit memory reference. Firstly we have
2370                  * to check that all registers involved are type E/Rxx.
2371                  */
2372                 int32_t sok = BITS32 | BITS64;
2373                 int32_t o = input->offset;
2374
2375                 if (it != -1) {
2376                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2377                         sok &= ix;
2378                     else
2379                         goto err;
2380                 }
2381
2382                 if (bt != -1) {
2383                     if (REG_GPR & ~bx)
2384                         goto err; /* Invalid register */
2385                     if (~sok & bx & SIZE_MASK)
2386                         goto err; /* Invalid size */
2387                     sok &= bx;
2388                 }
2389
2390                 /*
2391                  * While we're here, ensure the user didn't specify
2392                  * WORD or QWORD
2393                  */
2394                 if (input->disp_size == 16 || input->disp_size == 64)
2395                     goto err;
2396
2397                 if (addrbits == 16 ||
2398                     (addrbits == 32 && !(sok & BITS32)) ||
2399                     (addrbits == 64 && !(sok & BITS64)))
2400                     goto err;
2401
2402                 /* now reorganize base/index */
2403                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2404                     ((hb == b && ht == EAH_NOTBASE) ||
2405                      (hb == i && ht == EAH_MAKEBASE))) {
2406                     /* swap if hints say so */
2407                     t = bt, bt = it, it = t;
2408                     x = bx, bx = ix, ix = x;
2409                 }
2410                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2411                     bt = -1, bx = 0, s++;
2412                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2413                     /* make single reg base, unless hint */
2414                     bt = it, bx = ix, it = -1, ix = 0;
2415                 }
2416                 if (((s == 2 && it != REG_NUM_ESP && !(input->eaflags & EAF_TIMESTWO)) ||
2417                       s == 3 || s == 5 || s == 9) && bt == -1)
2418                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2419                 if (it == -1 && (bt & 7) != REG_NUM_ESP &&
2420                     (input->eaflags & EAF_TIMESTWO))
2421                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2422                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2423                 if (s == 1 && it == REG_NUM_ESP) {
2424                     /* swap ESP into base if scale is 1 */
2425                     t = it, it = bt, bt = t;
2426                     x = ix, ix = bx, bx = x;
2427                 }
2428                 if (it == REG_NUM_ESP ||
2429                     (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2430                     goto err;        /* wrong, for various reasons */
2431
2432                 output->rex |= rexflags(it, ix, REX_X);
2433                 output->rex |= rexflags(bt, bx, REX_B);
2434
2435                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2436                     /* no SIB needed */
2437                     int mod, rm;
2438
2439                     if (bt == -1) {
2440                         rm = 5;
2441                         mod = 0;
2442                     } else {
2443                         rm = (bt & 7);
2444                         if (rm != REG_NUM_EBP && o == 0 &&
2445                             seg == NO_SEG && !forw_ref &&
2446                             !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2447                             mod = 0;
2448                         else if (input->eaflags & EAF_BYTEOFFS ||
2449                                  (o >= -128 && o <= 127 &&
2450                                   seg == NO_SEG && !forw_ref &&
2451                                   !(input->eaflags & EAF_WORDOFFS)))
2452                             mod = 1;
2453                         else
2454                             mod = 2;
2455                     }
2456
2457                     output->sib_present = false;
2458                     output->bytes       = (bt == -1 || mod == 2 ? 4 : mod);
2459                     output->modrm       = GEN_MODRM(mod, rfield, rm);
2460                 } else {
2461                     /* we need a SIB */
2462                     int mod, scale, index, base;
2463
2464                     if (it == -1)
2465                         index = 4, s = 1;
2466                     else
2467                         index = (it & 7);
2468
2469                     switch (s) {
2470                     case 1:
2471                         scale = 0;
2472                         break;
2473                     case 2:
2474                         scale = 1;
2475                         break;
2476                     case 4:
2477                         scale = 2;
2478                         break;
2479                     case 8:
2480                         scale = 3;
2481                         break;
2482                     default:   /* then what the smeg is it? */
2483                         goto err;    /* panic */
2484                     }
2485
2486                     if (bt == -1) {
2487                         base = 5;
2488                         mod = 0;
2489                     } else {
2490                         base = (bt & 7);
2491                         if (base != REG_NUM_EBP && o == 0 &&
2492                             seg == NO_SEG && !forw_ref &&
2493                             !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2494                             mod = 0;
2495                         else if (input->eaflags & EAF_BYTEOFFS ||
2496                                  (o >= -128 && o <= 127 &&
2497                                   seg == NO_SEG && !forw_ref &&
2498                                   !(input->eaflags & EAF_WORDOFFS)))
2499                             mod = 1;
2500                         else
2501                             mod = 2;
2502                     }
2503
2504                     output->sib_present = true;
2505                     output->bytes       = (bt == -1 || mod == 2 ? 4 : mod);
2506                     output->modrm       = GEN_MODRM(mod, rfield, 4);
2507                     output->sib         = GEN_SIB(scale, index, base);
2508                 }
2509             } else {            /* it's 16-bit */
2510                 int mod, rm;
2511                 int16_t o = input->offset;
2512
2513                 /* check for 64-bit long mode */
2514                 if (addrbits == 64)
2515                     goto err;
2516
2517                 /* check all registers are BX, BP, SI or DI */
2518                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI && b != R_DI) ||
2519                     (i != -1 && i != R_BP && i != R_BX && i != R_SI && i != R_DI))
2520                     goto err;
2521
2522                 /* ensure the user didn't specify DWORD/QWORD */
2523                 if (input->disp_size == 32 || input->disp_size == 64)
2524                     goto err;
2525
2526                 if (s != 1 && i != -1)
2527                     goto err;        /* no can do, in 16-bit EA */
2528                 if (b == -1 && i != -1) {
2529                     int tmp = b;
2530                     b = i;
2531                     i = tmp;
2532                 }               /* swap */
2533                 if ((b == R_SI || b == R_DI) && i != -1) {
2534                     int tmp = b;
2535                     b = i;
2536                     i = tmp;
2537                 }
2538                 /* have BX/BP as base, SI/DI index */
2539                 if (b == i)
2540                     goto err;        /* shouldn't ever happen, in theory */
2541                 if (i != -1 && b != -1 &&
2542                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2543                     goto err;        /* invalid combinations */
2544                 if (b == -1)            /* pure offset: handled above */
2545                     goto err;        /* so if it gets to here, panic! */
2546
2547                 rm = -1;
2548                 if (i != -1)
2549                     switch (i * 256 + b) {
2550                     case R_SI * 256 + R_BX:
2551                         rm = 0;
2552                         break;
2553                     case R_DI * 256 + R_BX:
2554                         rm = 1;
2555                         break;
2556                     case R_SI * 256 + R_BP:
2557                         rm = 2;
2558                         break;
2559                     case R_DI * 256 + R_BP:
2560                         rm = 3;
2561                         break;
2562                 } else
2563                     switch (b) {
2564                     case R_SI:
2565                         rm = 4;
2566                         break;
2567                     case R_DI:
2568                         rm = 5;
2569                         break;
2570                     case R_BP:
2571                         rm = 6;
2572                         break;
2573                     case R_BX:
2574                         rm = 7;
2575                         break;
2576                     }
2577                 if (rm == -1)           /* can't happen, in theory */
2578                     goto err;        /* so panic if it does */
2579
2580                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2581                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2582                     mod = 0;
2583                 else if (input->eaflags & EAF_BYTEOFFS ||
2584                          (o >= -128 && o <= 127 && seg == NO_SEG &&
2585                           !forw_ref && !(input->eaflags & EAF_WORDOFFS)))
2586                     mod = 1;
2587                 else
2588                     mod = 2;
2589
2590                 output->sib_present = false;    /* no SIB - it's 16-bit */
2591                 output->bytes       = mod;      /* bytes of offset needed */
2592                 output->modrm       = GEN_MODRM(mod, rfield, rm);
2593             }
2594         }
2595     }
2596
2597     output->size = 1 + output->sib_present + output->bytes;
2598     return output->type;
2599
2600 err:
2601     return output->type = EA_INVALID;
2602 }
2603
2604 static void add_asp(insn *ins, int addrbits)
2605 {
2606     int j, valid;
2607     int defdisp;
2608
2609     valid = (addrbits == 64) ? 64|32 : 32|16;
2610
2611     switch (ins->prefixes[PPS_ASIZE]) {
2612     case P_A16:
2613         valid &= 16;
2614         break;
2615     case P_A32:
2616         valid &= 32;
2617         break;
2618     case P_A64:
2619         valid &= 64;
2620         break;
2621     case P_ASP:
2622         valid &= (addrbits == 32) ? 16 : 32;
2623         break;
2624     default:
2625         break;
2626     }
2627
2628     for (j = 0; j < ins->operands; j++) {
2629         if (is_class(MEMORY, ins->oprs[j].type)) {
2630             opflags_t i, b;
2631
2632             /* Verify as Register */
2633             if (!is_register(ins->oprs[j].indexreg))
2634                 i = 0;
2635             else
2636                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2637
2638             /* Verify as Register */
2639             if (!is_register(ins->oprs[j].basereg))
2640                 b = 0;
2641             else
2642                 b = nasm_reg_flags[ins->oprs[j].basereg];
2643
2644             if (ins->oprs[j].scale == 0)
2645                 i = 0;
2646
2647             if (!i && !b) {
2648                 int ds = ins->oprs[j].disp_size;
2649                 if ((addrbits != 64 && ds > 8) ||
2650                     (addrbits == 64 && ds == 16))
2651                     valid &= ds;
2652             } else {
2653                 if (!(REG16 & ~b))
2654                     valid &= 16;
2655                 if (!(REG32 & ~b))
2656                     valid &= 32;
2657                 if (!(REG64 & ~b))
2658                     valid &= 64;
2659
2660                 if (!(REG16 & ~i))
2661                     valid &= 16;
2662                 if (!(REG32 & ~i))
2663                     valid &= 32;
2664                 if (!(REG64 & ~i))
2665                     valid &= 64;
2666             }
2667         }
2668     }
2669
2670     if (valid & addrbits) {
2671         ins->addr_size = addrbits;
2672     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2673         /* Add an address size prefix */
2674         ins->prefixes[PPS_ASIZE] = (addrbits == 32) ? P_A16 : P_A32;;
2675         ins->addr_size = (addrbits == 32) ? 16 : 32;
2676     } else {
2677         /* Impossible... */
2678         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2679         ins->addr_size = addrbits; /* Error recovery */
2680     }
2681
2682     defdisp = ins->addr_size == 16 ? 16 : 32;
2683
2684     for (j = 0; j < ins->operands; j++) {
2685         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2686             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp) != ins->addr_size) {
2687             /*
2688              * mem_offs sizes must match the address size; if not,
2689              * strip the MEM_OFFS bit and match only EA instructions
2690              */
2691             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2692         }
2693     }
2694 }