assemble.c

   1 /* ----------------------------------------------------------------------- *
   2  *
   3  *   Copyright 1996-2010 The NASM Authors - All Rights Reserved
   4  *   See the file AUTHORS included with the NASM distribution for
   5  *   the specific copyright holders.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following
   9  *   conditions are met:
  10  *
  11  *   * Redistributions of source code must retain the above copyright
  12  *     notice, this list of conditions and the following disclaimer.
  13  *   * Redistributions in binary form must reproduce the above
  14  *     copyright notice, this list of conditions and the following
  15  *     disclaimer in the documentation and/or other materials provided
  16  *     with the distribution.
  17  *
  18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  19  *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  20  *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  21  *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22  *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  23  *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24  *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  25  *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28  *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  29  *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  30  *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31  *
  32  * ----------------------------------------------------------------------- */
  33
  34 /*
  35  * assemble.c   code generation for the Netwide Assembler
  36  *
  37  * the actual codes (C syntax, i.e. octal):
  38  * \0            - terminates the code. (Unless it's a literal of course.)
  39  * \1..\4        - that many literal bytes follow in the code stream
  40  * \5            - add 4 to the primary operand number (b, low octdigit)
  41  * \6            - add 4 to the secondary operand number (a, middle octdigit)
  42  * \7            - add 4 to both the primary and the secondary operand number
  43  * \10..\13      - a literal byte follows in the code stream, to be added
  44  *                 to the register value of operand 0..3
  45  * \14..\17      - a signed byte immediate operand, from operand 0..3
  46  * \20..\23      - a byte immediate operand, from operand 0..3
  47  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  48  * \30..\33      - a word immediate operand, from operand 0..3
  49  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  50  *                 assembly mode or the operand-size override on the operand
  51  * \40..\43      - a long immediate operand, from operand 0..3
  52  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  53  *                 depending on the address size of the instruction.
  54  * \50..\53      - a byte relative operand, from operand 0..3
  55  * \54..\57      - a qword immediate operand, from operand 0..3
  56  * \60..\63      - a word relative operand, from operand 0..3
  57  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  58  *                 assembly mode or the operand-size override on the operand
  59  * \70..\73      - a long relative operand, from operand 0..3
  60  * \74..\77      - a word constant, from the _segment_ part of operand 0..3
  61  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  62  *                 field the register value of operand b.
  63  * \140..\143    - an immediate word or signed byte for operand 0..3
  64  * \144..\147    - or 2 (s-field) into opcode byte if operand 0..3
  65  *                  is a signed byte rather than a word.  Opcode byte follows.
  66  * \150..\153    - an immediate dword or signed byte for operand 0..3
  67  * \154..\157    - or 2 (s-field) into opcode byte if operand 0..3
  68  *                  is a signed byte rather than a dword.  Opcode byte follows.
  69  * \160..\163    - this instruction uses DREX rather than REX, with the
  70  *                 OC0 field set to 0, and the dest field taken from
  71  *                 operand 0..3.
  72  * \164..\167    - this instruction uses DREX rather than REX, with the
  73  *                 OC0 field set to 1, and the dest field taken from
  74  *                 operand 0..3.
  75  * \171          - placement of DREX suffix in the absence of an EA
  76  * \172\ab       - the register number from operand a in bits 7..4, with
  77  *                 the 4-bit immediate from operand b in bits 3..0.
  78  * \173\xab      - the register number from operand a in bits 7..4, with
  79  *                 the value b in bits 3..0.
  80  * \174\a        - the register number from operand a in bits 7..4, and
  81  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  82  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  83  *                 field equal to digit b.
  84  * \250..\253    - same as \150..\153, except warn if the 64-bit operand
  85  *                 is not equal to the truncated and sign-extended 32-bit
  86  *                 operand; used for 32-bit immediates in 64-bit mode.
  87  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
  88  * \260..\263    - this instruction uses VEX/XOP rather than REX, with the
  89  *                 V field taken from operand 0..3.
  90  * \270          - this instruction uses VEX/XOP rather than REX, with the
  91  *                 V field set to 1111b.
  92  *
  93  * VEX/XOP prefixes are followed by the sequence:
  94  * \tmm\wlp        where mm is the M field; and wlp is:
  95  *                 00 0ww lpp
  96  *                 [w0] ww = 0 for W = 0
  97  *                 [w1] ww = 1 for W = 1
  98  *                 [wx] ww = 2 for W don't care (always assembled as 0)
  99  *                 [ww] ww = 3 for W used as REX.W
 100  *
 101  * t = 0 for VEX (C4/C5), t = 1 for XOP (8F).
 102  *
 103  * \274..\277    - a signed byte immediate operand, from operand 0..3,
 104  *                 which is to be extended to the operand size.
 105  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
 106  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
 107  * \312          - (disassembler only) invalid with non-default address size.
 108  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
 109  * \314          - (disassembler only) invalid with REX.B
 110  * \315          - (disassembler only) invalid with REX.X
 111  * \316          - (disassembler only) invalid with REX.R
 112  * \317          - (disassembler only) invalid with REX.W
 113  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
 114  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
 115  * \322          - indicates that this instruction is only valid when the
 116  *                 operand size is the default (instruction to disassembler,
 117  *                 generates no code in the assembler)
 118  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
 119  * \324          - indicates 64-bit operand size requiring REX prefix.
 120  * \325          - instruction which always uses spl/bpl/sil/dil
 121  * \330          - a literal byte follows in the code stream, to be added
 122  *                 to the condition code value of the instruction.
 123  * \331          - instruction not valid with REP prefix.  Hint for
 124  *                 disassembler only; for SSE instructions.
 125  * \332          - REP prefix (0xF2 byte) used as opcode extension.
 126  * \333          - REP prefix (0xF3 byte) used as opcode extension.
 127  * \334          - LOCK prefix used as REX.R (used in non-64-bit mode)
 128  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
 129  * \336          - force a REP(E) prefix (0xF2) even if not specified.
 130  * \337          - force a REPNE prefix (0xF3) even if not specified.
 131  *                 \336-\337 are still listed as prefixes in the disassembler.
 132  * \340          - reserve <operand 0> bytes of uninitialized storage.
 133  *                 Operand 0 had better be a segmentless constant.
 134  * \341          - this instruction needs a WAIT "prefix"
 135  * \344,\345     - the PUSH/POP (respectively) codes for CS, DS, ES, SS
 136  *                 (POP is never used for CS) depending on operand 0
 137  * \346,\347     - the second byte of PUSH/POP codes for FS, GS, depending
 138  *                 on operand 0
 139  * \360          - no SSE prefix (== \364\331)
 140  * \361          - 66 SSE prefix (== \366\331)
 141  * \362          - F2 SSE prefix (== \364\332)
 142  * \363          - F3 SSE prefix (== \364\333)
 143  * \364          - operand-size prefix (0x66) not permitted
 144  * \365          - address-size prefix (0x67) not permitted
 145  * \366          - operand-size prefix (0x66) used as opcode extension
 146  * \367          - address-size prefix (0x67) used as opcode extension
 147  * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
 148  *                 370 is used for Jcc, 371 is used for JMP.
 149  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 150  *                 used for conditional jump over longer jump
 151  */
 152
 153 #include "compiler.h"
 154
 155 #include <stdio.h>
 156 #include <string.h>
 157 #include <inttypes.h>
 158
 159 #include "nasm.h"
 160 #include "nasmlib.h"
 161 #include "assemble.h"
 162 #include "insns.h"
 163 #include "tables.h"
 164
 165 enum match_result {
 166     /*
 167      * Matching errors.  These should be sorted so that more specific
 168      * errors come later in the sequence.
 169      */
 170     MERR_INVALOP,
 171     MERR_OPSIZEMISSING,
 172     MERR_OPSIZEMISMATCH,
 173     MERR_BADCPU,
 174     MERR_BADMODE,
 175     /*
 176      * Matching success; the conditional ones first
 177      */
 178     MOK_JUMP,                   /* Matching OK but needs jmp_match() */
 179     MOK_GOOD                    /* Matching unconditionally OK */
 180 };
 181
 182 typedef struct {
 183     int sib_present;                 /* is a SIB byte necessary? */
 184     int bytes;                       /* # of bytes of offset needed */
 185     int size;                        /* lazy - this is sib+bytes+1 */
 186     uint8_t modrm, sib, rex, rip;    /* the bytes themselves */
 187 } ea;
 188
 189 static uint32_t cpu;            /* cpu level received from nasm.c */
 190 static efunc errfunc;
 191 static struct ofmt *outfmt;
 192 static ListGen *list;
 193
 194 static int64_t calcsize(int32_t, int64_t, int, insn *, const uint8_t *);
 195 static void gencode(int32_t segment, int64_t offset, int bits,
 196                     insn * ins, const struct itemplate *temp,
 197                     int64_t insn_end);
 198 static enum match_result find_match(const struct itemplate **tempp,
 199                                     insn *instruction,
 200                                     int32_t segment, int64_t offset, int bits);
 201 static enum match_result matches(const struct itemplate *, insn *, int bits);
 202 static opflags_t regflag(const operand *);
 203 static int32_t regval(const operand *);
 204 static int rexflags(int, opflags_t, int);
 205 static int op_rexflags(const operand *, int);
 206 static ea *process_ea(operand *, ea *, int, int, int, opflags_t);
 207 static void add_asp(insn *, int);
 208
 209 static int has_prefix(insn * ins, enum prefix_pos pos, enum prefixes prefix)
 210 {
 211     return ins->prefixes[pos] == prefix;
 212 }
 213
 214 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 215 {
 216     if (ins->prefixes[pos])
 217         errfunc(ERR_NONFATAL, "invalid %s prefix",
 218                 prefix_name(ins->prefixes[pos]));
 219 }
 220
 221 static const char *size_name(int size)
 222 {
 223     switch (size) {
 224     case 1:
 225         return "byte";
 226     case 2:
 227         return "word";
 228     case 4:
 229         return "dword";
 230     case 8:
 231         return "qword";
 232     case 10:
 233         return "tword";
 234     case 16:
 235         return "oword";
 236     case 32:
 237         return "yword";
 238     default:
 239         return "???";
 240     }
 241 }
 242
 243 static void warn_overflow(int pass, int size)
 244 {
 245     errfunc(ERR_WARNING | pass | ERR_WARN_NOV,
 246             "%s data exceeds bounds", size_name(size));
 247 }
 248
 249 static void warn_overflow_const(int64_t data, int size)
 250 {
 251     if (overflow_general(data, size))
 252         warn_overflow(ERR_PASS1, size);
 253 }
 254
 255 static void warn_overflow_opd(const struct operand *o, int size)
 256 {
 257     if (o->wrt == NO_SEG && o->segment == NO_SEG) {
 258         if (overflow_general(o->offset, size))
 259             warn_overflow(ERR_PASS2, size);
 260     }
 261 }
 262
 263 /*
 264  * This routine wrappers the real output format's output routine,
 265  * in order to pass a copy of the data off to the listing file
 266  * generator at the same time.
 267  */
 268 static void out(int64_t offset, int32_t segto, const void *data,
 269                 enum out_type type, uint64_t size,
 270                 int32_t segment, int32_t wrt)
 271 {
 272     static int32_t lineno = 0;     /* static!!! */
 273     static char *lnfname = NULL;
 274     uint8_t p[8];
 275
 276     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 277         /*
 278          * This is a non-relocated address, and we're going to
 279          * convert it into RAWDATA format.
 280          */
 281         uint8_t *q = p;
 282
 283         if (size > 8) {
 284             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 285             return;
 286         }
 287
 288         WRITEADDR(q, *(int64_t *)data, size);
 289         data = p;
 290         type = OUT_RAWDATA;
 291     }
 292
 293     list->output(offset, data, type, size);
 294
 295     /*
 296      * this call to src_get determines when we call the
 297      * debug-format-specific "linenum" function
 298      * it updates lineno and lnfname to the current values
 299      * returning 0 if "same as last time", -2 if lnfname
 300      * changed, and the amount by which lineno changed,
 301      * if it did. thus, these variables must be static
 302      */
 303
 304     if (src_get(&lineno, &lnfname)) {
 305         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 306     }
 307
 308     outfmt->output(segto, data, type, size, segment, wrt);
 309 }
 310
 311 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 312                      insn * ins, const uint8_t *code)
 313 {
 314     int64_t isize;
 315     uint8_t c = code[0];
 316
 317     if ((c != 0370 && c != 0371) || (ins->oprs[0].type & STRICT))
 318         return false;
 319     if (!optimizing)
 320         return false;
 321     if (optimizing < 0 && c == 0371)
 322         return false;
 323
 324     isize = calcsize(segment, offset, bits, ins, code);
 325
 326     if (ins->oprs[0].opflags & OPFLAG_UNKNOWN)
 327         /* Be optimistic in pass 1 */
 328         return true;
 329
 330     if (ins->oprs[0].segment != segment)
 331         return false;
 332
 333     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 334     return (isize >= -128 && isize <= 127); /* is it byte size? */
 335 }
 336
 337 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 338                  insn * instruction, struct ofmt *output, efunc error,
 339                  ListGen * listgen)
 340 {
 341     const struct itemplate *temp;
 342     int j;
 343     enum match_result m;
 344     int64_t insn_end;
 345     int32_t itimes;
 346     int64_t start = offset;
 347     int64_t wsize;              /* size for DB etc. */
 348
 349     errfunc = error;            /* to pass to other functions */
 350     cpu = cp;
 351     outfmt = output;            /* likewise */
 352     list = listgen;             /* and again */
 353
 354     wsize = idata_bytes(instruction->opcode);
 355     if (wsize == -1)
 356         return 0;
 357
 358     if (wsize) {
 359         extop *e;
 360         int32_t t = instruction->times;
 361         if (t < 0)
 362             errfunc(ERR_PANIC,
 363                     "instruction->times < 0 (%ld) in assemble()", t);
 364
 365         while (t--) {           /* repeat TIMES times */
 366             list_for_each(e, instruction->eops) {
 367                 if (e->type == EOT_DB_NUMBER) {
 368                     if (wsize > 8) {
 369                         errfunc(ERR_NONFATAL,
 370                                 "integer supplied to a DT, DO or DY"
 371                                 " instruction");
 372                     } else {
 373                         out(offset, segment, &e->offset,
 374                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 375                         offset += wsize;
 376                     }
 377                 } else if (e->type == EOT_DB_STRING ||
 378                            e->type == EOT_DB_STRING_FREE) {
 379                     int align;
 380
 381                     out(offset, segment, e->stringval,
 382                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 383                     align = e->stringlen % wsize;
 384
 385                     if (align) {
 386                         align = wsize - align;
 387                         out(offset, segment, zero_buffer,
 388                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 389                     }
 390                     offset += e->stringlen + align;
 391                 }
 392             }
 393             if (t > 0 && t == instruction->times - 1) {
 394                 /*
 395                  * Dummy call to list->output to give the offset to the
 396                  * listing module.
 397                  */
 398                 list->output(offset, NULL, OUT_RAWDATA, 0);
 399                 list->uplevel(LIST_TIMES);
 400             }
 401         }
 402         if (instruction->times > 1)
 403             list->downlevel(LIST_TIMES);
 404         return offset - start;
 405     }
 406
 407     if (instruction->opcode == I_INCBIN) {
 408         const char *fname = instruction->eops->stringval;
 409         FILE *fp;
 410
 411         fp = fopen(fname, "rb");
 412         if (!fp) {
 413             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 414                   fname);
 415         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 416             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 417                   fname);
 418         } else {
 419             static char buf[4096];
 420             size_t t = instruction->times;
 421             size_t base = 0;
 422             size_t len;
 423
 424             len = ftell(fp);
 425             if (instruction->eops->next) {
 426                 base = instruction->eops->next->offset;
 427                 len -= base;
 428                 if (instruction->eops->next->next &&
 429                     len > (size_t)instruction->eops->next->next->offset)
 430                     len = (size_t)instruction->eops->next->next->offset;
 431             }
 432             /*
 433              * Dummy call to list->output to give the offset to the
 434              * listing module.
 435              */
 436             list->output(offset, NULL, OUT_RAWDATA, 0);
 437             list->uplevel(LIST_INCBIN);
 438             while (t--) {
 439                 size_t l;
 440
 441                 fseek(fp, base, SEEK_SET);
 442                 l = len;
 443                 while (l > 0) {
 444                     int32_t m;
 445                     m = fread(buf, 1, l > sizeof(buf) ? sizeof(buf) : l, fp);
 446                     if (!m) {
 447                         /*
 448                          * This shouldn't happen unless the file
 449                          * actually changes while we are reading
 450                          * it.
 451                          */
 452                         error(ERR_NONFATAL,
 453                               "`incbin': unexpected EOF while"
 454                               " reading file `%s'", fname);
 455                         t = 0;  /* Try to exit cleanly */
 456                         break;
 457                     }
 458                     out(offset, segment, buf, OUT_RAWDATA, m,
 459                         NO_SEG, NO_SEG);
 460                     l -= m;
 461                 }
 462             }
 463             list->downlevel(LIST_INCBIN);
 464             if (instruction->times > 1) {
 465                 /*
 466                  * Dummy call to list->output to give the offset to the
 467                  * listing module.
 468                  */
 469                 list->output(offset, NULL, OUT_RAWDATA, 0);
 470                 list->uplevel(LIST_TIMES);
 471                 list->downlevel(LIST_TIMES);
 472             }
 473             fclose(fp);
 474             return instruction->times * len;
 475         }
 476         return 0;               /* if we're here, there's an error */
 477     }
 478
 479     /* Check to see if we need an address-size prefix */
 480     add_asp(instruction, bits);
 481
 482     m = find_match(&temp, instruction, segment, offset, bits);
 483
 484     if (m == MOK_GOOD) {
 485         /* Matches! */
 486         int64_t insn_size = calcsize(segment, offset, bits,
 487                                      instruction, temp->code);
 488         itimes = instruction->times;
 489         if (insn_size < 0)  /* shouldn't be, on pass two */
 490             error(ERR_PANIC, "errors made it through from pass one");
 491         else
 492             while (itimes--) {
 493                 for (j = 0; j < MAXPREFIX; j++) {
 494                     uint8_t c = 0;
 495                     switch (instruction->prefixes[j]) {
 496                     case P_WAIT:
 497                         c = 0x9B;
 498                         break;
 499                     case P_LOCK:
 500                         c = 0xF0;
 501                         break;
 502                     case P_REPNE:
 503                     case P_REPNZ:
 504                         c = 0xF2;
 505                         break;
 506                     case P_REPE:
 507                     case P_REPZ:
 508                     case P_REP:
 509                         c = 0xF3;
 510                         break;
 511                     case R_CS:
 512                         if (bits == 64) {
 513                             error(ERR_WARNING | ERR_PASS2,
 514                                   "cs segment base generated, but will be ignored in 64-bit mode");
 515                         }
 516                         c = 0x2E;
 517                         break;
 518                     case R_DS:
 519                         if (bits == 64) {
 520                             error(ERR_WARNING | ERR_PASS2,
 521                                   "ds segment base generated, but will be ignored in 64-bit mode");
 522                         }
 523                         c = 0x3E;
 524                         break;
 525                     case R_ES:
 526                         if (bits == 64) {
 527                             error(ERR_WARNING | ERR_PASS2,
 528                                   "es segment base generated, but will be ignored in 64-bit mode");
 529                         }
 530                         c = 0x26;
 531                         break;
 532                     case R_FS:
 533                         c = 0x64;
 534                         break;
 535                     case R_GS:
 536                         c = 0x65;
 537                         break;
 538                     case R_SS:
 539                         if (bits == 64) {
 540                             error(ERR_WARNING | ERR_PASS2,
 541                                   "ss segment base generated, but will be ignored in 64-bit mode");
 542                         }
 543                         c = 0x36;
 544                         break;
 545                     case R_SEGR6:
 546                     case R_SEGR7:
 547                         error(ERR_NONFATAL,
 548                               "segr6 and segr7 cannot be used as prefixes");
 549                         break;
 550                     case P_A16:
 551                         if (bits == 64) {
 552                             error(ERR_NONFATAL,
 553                                   "16-bit addressing is not supported "
 554                                   "in 64-bit mode");
 555                         } else if (bits != 16)
 556                             c = 0x67;
 557                         break;
 558                     case P_A32:
 559                         if (bits != 32)
 560                             c = 0x67;
 561                         break;
 562                     case P_A64:
 563                         if (bits != 64) {
 564                             error(ERR_NONFATAL,
 565                                   "64-bit addressing is only supported "
 566                                   "in 64-bit mode");
 567                         }
 568                         break;
 569                     case P_ASP:
 570                         c = 0x67;
 571                         break;
 572                     case P_O16:
 573                         if (bits != 16)
 574                             c = 0x66;
 575                         break;
 576                     case P_O32:
 577                         if (bits == 16)
 578                             c = 0x66;
 579                         break;
 580                     case P_O64:
 581                         /* REX.W */
 582                         break;
 583                     case P_OSP:
 584                         c = 0x66;
 585                         break;
 586                     case P_none:
 587                         break;
 588                     default:
 589                         error(ERR_PANIC, "invalid instruction prefix");
 590                     }
 591                     if (c != 0) {
 592                         out(offset, segment, &c, OUT_RAWDATA, 1,
 593                             NO_SEG, NO_SEG);
 594                         offset++;
 595                     }
 596                 }
 597                 insn_end = offset + insn_size;
 598                 gencode(segment, offset, bits, instruction,
 599                         temp, insn_end);
 600                 offset += insn_size;
 601                 if (itimes > 0 && itimes == instruction->times - 1) {
 602                     /*
 603                      * Dummy call to list->output to give the offset to the
 604                      * listing module.
 605                      */
 606                     list->output(offset, NULL, OUT_RAWDATA, 0);
 607                     list->uplevel(LIST_TIMES);
 608                 }
 609             }
 610         if (instruction->times > 1)
 611             list->downlevel(LIST_TIMES);
 612         return offset - start;
 613     } else {
 614         /* No match */
 615         switch (m) {
 616         case MERR_OPSIZEMISSING:
 617             error(ERR_NONFATAL, "operation size not specified");
 618             break;
 619         case MERR_OPSIZEMISMATCH:
 620             error(ERR_NONFATAL, "mismatch in operand sizes");
 621             break;
 622         case MERR_BADCPU:
 623             error(ERR_NONFATAL, "no instruction for this cpu level");
 624             break;
 625         case MERR_BADMODE:
 626             error(ERR_NONFATAL, "instruction not supported in %d-bit mode",
 627                   bits);
 628             break;
 629         default:
 630             error(ERR_NONFATAL,
 631                   "invalid combination of opcode and operands");
 632             break;
 633         }
 634     }
 635     return 0;
 636 }
 637
 638 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 639                   insn * instruction, efunc error)
 640 {
 641     const struct itemplate *temp;
 642     enum match_result m;
 643
 644     errfunc = error;            /* to pass to other functions */
 645     cpu = cp;
 646
 647     if (instruction->opcode == I_none)
 648         return 0;
 649
 650     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 651         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 652         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 653         instruction->opcode == I_DY) {
 654         extop *e;
 655         int32_t isize, osize, wsize;
 656
 657         isize = 0;
 658         wsize = idata_bytes(instruction->opcode);
 659
 660         list_for_each(e, instruction->eops) {
 661             int32_t align;
 662
 663             osize = 0;
 664             if (e->type == EOT_DB_NUMBER) {
 665                 osize = 1;
 666                 warn_overflow_const(e->offset, wsize);
 667             } else if (e->type == EOT_DB_STRING ||
 668                        e->type == EOT_DB_STRING_FREE)
 669                 osize = e->stringlen;
 670
 671             align = (-osize) % wsize;
 672             if (align < 0)
 673                 align += wsize;
 674             isize += osize + align;
 675         }
 676         return isize * instruction->times;
 677     }
 678
 679     if (instruction->opcode == I_INCBIN) {
 680         const char *fname = instruction->eops->stringval;
 681         FILE *fp;
 682         int64_t val = 0;
 683         size_t len;
 684
 685         fp = fopen(fname, "rb");
 686         if (!fp)
 687             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 688                   fname);
 689         else if (fseek(fp, 0L, SEEK_END) < 0)
 690             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 691                   fname);
 692         else {
 693             len = ftell(fp);
 694             if (instruction->eops->next) {
 695                 len -= instruction->eops->next->offset;
 696                 if (instruction->eops->next->next &&
 697                     len > (size_t)instruction->eops->next->next->offset) {
 698                     len = (size_t)instruction->eops->next->next->offset;
 699                 }
 700             }
 701             val = instruction->times * len;
 702         }
 703         if (fp)
 704             fclose(fp);
 705         return val;
 706     }
 707
 708     /* Check to see if we need an address-size prefix */
 709     add_asp(instruction, bits);
 710
 711     m = find_match(&temp, instruction, segment, offset, bits);
 712     if (m == MOK_GOOD) {
 713         /* we've matched an instruction. */
 714         int64_t isize;
 715         const uint8_t *codes = temp->code;
 716         int j;
 717
 718         isize = calcsize(segment, offset, bits, instruction, codes);
 719         if (isize < 0)
 720             return -1;
 721         for (j = 0; j < MAXPREFIX; j++) {
 722             switch (instruction->prefixes[j]) {
 723             case P_A16:
 724                 if (bits != 16)
 725                     isize++;
 726                 break;
 727             case P_A32:
 728                 if (bits != 32)
 729                     isize++;
 730                 break;
 731             case P_O16:
 732                 if (bits != 16)
 733                     isize++;
 734                 break;
 735             case P_O32:
 736                 if (bits == 16)
 737                     isize++;
 738                 break;
 739             case P_A64:
 740             case P_O64:
 741             case P_none:
 742                 break;
 743             default:
 744                 isize++;
 745                 break;
 746             }
 747         }
 748         return isize * instruction->times;
 749     } else {
 750         return -1;                  /* didn't match any instruction */
 751     }
 752 }
 753
 754 static bool possible_sbyte(operand *o, int min_optimizing)
 755 {
 756     return o->wrt == NO_SEG && o->segment == NO_SEG &&
 757         !(o->opflags & OPFLAG_UNKNOWN) &&
 758         optimizing >= min_optimizing && !(o->type & STRICT);
 759 }
 760
 761 /* check that opn[op]  is a signed byte of size 16 or 32 */
 762 static bool is_sbyte16(operand *o, int min_optimizing)
 763 {
 764     int16_t v;
 765
 766     if (!possible_sbyte(o, min_optimizing))
 767         return false;
 768
 769     v = o->offset;
 770     return v >= -128 && v <= 127;
 771 }
 772
 773 static bool is_sbyte32(operand *o, int min_optimizing)
 774 {
 775     int32_t v;
 776
 777     if (!possible_sbyte(o, min_optimizing))
 778         return false;
 779
 780     v = o->offset;
 781     return v >= -128 && v <= 127;
 782 }
 783
 784 /* Check if o is zero of size 16 or 32 */
 785 static bool is_zero16(operand *o, int min_optimizing)
 786 {
 787     int16_t v;
 788
 789     if (!possible_sbyte(o, min_optimizing))
 790         return false;
 791
 792     v = o->offset;
 793     return v == 0;
 794 }
 795
 796 static bool is_zero32(operand *o, int min_optimizing)
 797 {
 798     int32_t v;
 799
 800     if (!possible_sbyte(o, min_optimizing))
 801         return false;
 802
 803     v = o->offset;
 804     return v == 0;
 805 }
 806
 807 /* Common construct */
 808 #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
 809
 810 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 811                         insn * ins, const uint8_t *codes)
 812 {
 813     int64_t length = 0;
 814     uint8_t c;
 815     int rex_mask = ~0;
 816     int op1, op2;
 817     struct operand *opx;
 818     uint8_t opex = 0;
 819
 820     ins->rex = 0;               /* Ensure REX is reset */
 821
 822     if (ins->prefixes[PPS_OSIZE] == P_O64)
 823         ins->rex |= REX_W;
 824
 825     (void)segment;              /* Don't warn that this parameter is unused */
 826     (void)offset;               /* Don't warn that this parameter is unused */
 827
 828     while (*codes) {
 829         c = *codes++;
 830         op1 = (c & 3) + ((opex & 1) << 2);
 831         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
 832         opx = &ins->oprs[op1];
 833         opex = 0;               /* For the next iteration */
 834
 835         switch (c) {
 836         case 01:
 837         case 02:
 838         case 03:
 839         case 04:
 840             codes += c, length += c;
 841             break;
 842
 843         case 05:
 844         case 06:
 845         case 07:
 846             opex = c;
 847             break;
 848
 849         case4(010):
 850             ins->rex |=
 851                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 852             codes++, length++;
 853             break;
 854
 855         case4(014):
 856         case4(020):
 857         case4(024):
 858             length++;
 859             break;
 860
 861         case4(030):
 862             length += 2;
 863             break;
 864
 865         case4(034):
 866             if (opx->type & (BITS16 | BITS32 | BITS64))
 867                 length += (opx->type & BITS16) ? 2 : 4;
 868             else
 869                 length += (bits == 16) ? 2 : 4;
 870             break;
 871
 872         case4(040):
 873             length += 4;
 874             break;
 875
 876         case4(044):
 877             length += ins->addr_size >> 3;
 878             break;
 879
 880         case4(050):
 881             length++;
 882             break;
 883
 884         case4(054):
 885             length += 8; /* MOV reg64/imm */
 886             break;
 887
 888         case4(060):
 889             length += 2;
 890             break;
 891
 892         case4(064):
 893             if (opx->type & (BITS16 | BITS32 | BITS64))
 894                 length += (opx->type & BITS16) ? 2 : 4;
 895             else
 896                 length += (bits == 16) ? 2 : 4;
 897             break;
 898
 899         case4(070):
 900             length += 4;
 901             break;
 902
 903         case4(074):
 904             length += 2;
 905             break;
 906
 907         case4(0140):
 908             length += is_sbyte16(opx, 0) ? 1 : 2;
 909             break;
 910
 911         case4(0144):
 912             codes++;
 913             length++;
 914             break;
 915
 916         case4(0150):
 917             length += is_sbyte32(opx, 0) ? 1 : 4;
 918             break;
 919
 920         case4(0154):
 921             codes++;
 922             length++;
 923             break;
 924
 925         case4(0160):
 926             length++;
 927             ins->rex |= REX_D;
 928             ins->drexdst = regval(opx);
 929             break;
 930
 931         case4(0164):
 932             length++;
 933             ins->rex |= REX_D|REX_OC;
 934             ins->drexdst = regval(opx);
 935             break;
 936
 937         case 0171:
 938             break;
 939
 940         case 0172:
 941         case 0173:
 942         case 0174:
 943             codes++;
 944             length++;
 945             break;
 946
 947         case4(0250):
 948             length += is_sbyte32(opx, 0) ? 1 : 4;
 949             break;
 950
 951         case4(0254):
 952             length += 4;
 953             break;
 954
 955         case4(0260):
 956             ins->rex |= REX_V;
 957             ins->drexdst = regval(opx);
 958             ins->vex_cm = *codes++;
 959             ins->vex_wlp = *codes++;
 960             break;
 961
 962         case 0270:
 963             ins->rex |= REX_V;
 964             ins->drexdst = 0;
 965             ins->vex_cm = *codes++;
 966             ins->vex_wlp = *codes++;
 967             break;
 968
 969         case4(0274):
 970             length++;
 971             break;
 972
 973         case4(0300):
 974             break;
 975
 976         case 0310:
 977             if (bits == 64)
 978                 return -1;
 979             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
 980             break;
 981
 982         case 0311:
 983             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
 984             break;
 985
 986         case 0312:
 987             break;
 988
 989         case 0313:
 990             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
 991                 has_prefix(ins, PPS_ASIZE, P_A32))
 992                 return -1;
 993             break;
 994
 995         case4(0314):
 996             break;
 997
 998         case 0320:
 999             length += (bits != 16);
1000             break;
1001
1002         case 0321:
1003             length += (bits == 16);
1004             break;
1005
1006         case 0322:
1007             break;
1008
1009         case 0323:
1010             rex_mask &= ~REX_W;
1011             break;
1012
1013         case 0324:
1014             ins->rex |= REX_W;
1015             break;
1016
1017         case 0325:
1018             ins->rex |= REX_NH;
1019             break;
1020
1021         case 0330:
1022             codes++, length++;
1023             break;
1024
1025         case 0331:
1026             break;
1027
1028         case 0332:
1029         case 0333:
1030             length++;
1031             break;
1032
1033         case 0334:
1034             ins->rex |= REX_L;
1035             break;
1036
1037         case 0335:
1038             break;
1039
1040         case 0336:
1041             if (!ins->prefixes[PPS_LREP])
1042                 ins->prefixes[PPS_LREP] = P_REP;
1043             break;
1044
1045         case 0337:
1046             if (!ins->prefixes[PPS_LREP])
1047                 ins->prefixes[PPS_LREP] = P_REPNE;
1048             break;
1049
1050         case 0340:
1051             if (ins->oprs[0].segment != NO_SEG)
1052                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1053                         " quantity of BSS space");
1054             else
1055                 length += ins->oprs[0].offset;
1056             break;
1057
1058         case 0341:
1059             if (!ins->prefixes[PPS_WAIT])
1060                 ins->prefixes[PPS_WAIT] = P_WAIT;
1061             break;
1062
1063         case4(0344):
1064             length++;
1065             break;
1066
1067         case 0360:
1068             break;
1069
1070         case 0361:
1071         case 0362:
1072         case 0363:
1073             length++;
1074             break;
1075
1076         case 0364:
1077         case 0365:
1078             break;
1079
1080         case 0366:
1081         case 0367:
1082             length++;
1083             break;
1084
1085         case 0370:
1086         case 0371:
1087         case 0372:
1088             break;
1089
1090         case 0373:
1091             length++;
1092             break;
1093
1094         case4(0100):
1095         case4(0110):
1096         case4(0120):
1097         case4(0130):
1098         case4(0200):
1099         case4(0204):
1100         case4(0210):
1101         case4(0214):
1102         case4(0220):
1103         case4(0224):
1104         case4(0230):
1105         case4(0234):
1106             {
1107                 ea ea_data;
1108                 int rfield;
1109                 opflags_t rflags;
1110                 struct operand *opy = &ins->oprs[op2];
1111
1112                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1113
1114                 if (c <= 0177) {
1115                     /* pick rfield from operand b (opx) */
1116                     rflags = regflag(opx);
1117                     rfield = nasm_regvals[opx->basereg];
1118                 } else {
1119                     rflags = 0;
1120                     rfield = c & 7;
1121                 }
1122                 if (!process_ea(opy, &ea_data, bits,
1123                                 ins->addr_size, rfield, rflags)) {
1124                     errfunc(ERR_NONFATAL, "invalid effective address");
1125                     return -1;
1126                 } else {
1127                     ins->rex |= ea_data.rex;
1128                     length += ea_data.size;
1129                 }
1130             }
1131             break;
1132
1133         default:
1134             errfunc(ERR_PANIC, "internal instruction table corrupt"
1135                     ": instruction code \\%o (0x%02X) given", c, c);
1136             break;
1137         }
1138     }
1139
1140     ins->rex &= rex_mask;
1141
1142     if (ins->rex & REX_NH) {
1143         if (ins->rex & REX_H) {
1144             errfunc(ERR_NONFATAL, "instruction cannot use high registers");
1145             return -1;
1146         }
1147         ins->rex &= ~REX_P;     /* Don't force REX prefix due to high reg */
1148     }
1149
1150     if (ins->rex & REX_V) {
1151         int bad32 = REX_R|REX_W|REX_X|REX_B;
1152
1153         if (ins->rex & REX_H) {
1154             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1155             return -1;
1156         }
1157         switch (ins->vex_wlp & 030) {
1158         case 000:
1159         case 020:
1160             ins->rex &= ~REX_W;
1161             break;
1162         case 010:
1163             ins->rex |= REX_W;
1164             bad32 &= ~REX_W;
1165             break;
1166         case 030:
1167             /* Follow REX_W */
1168             break;
1169         }
1170
1171         if (bits != 64 && ((ins->rex & bad32) || ins->drexdst > 7)) {
1172             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1173             return -1;
1174         }
1175         if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_R|REX_B)))
1176             length += 3;
1177         else
1178             length += 2;
1179     } else if (ins->rex & REX_D) {
1180         if (ins->rex & REX_H) {
1181             errfunc(ERR_NONFATAL, "cannot use high register in drex instruction");
1182             return -1;
1183         }
1184         if (bits != 64 && ((ins->rex & (REX_R|REX_W|REX_X|REX_B)) ||
1185                            ins->drexdst > 7)) {
1186             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1187             return -1;
1188         }
1189         length++;
1190     } else if (ins->rex & REX_REAL) {
1191         if (ins->rex & REX_H) {
1192             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1193             return -1;
1194         } else if (bits == 64) {
1195             length++;
1196         } else if ((ins->rex & REX_L) &&
1197                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1198                    cpu >= IF_X86_64) {
1199             /* LOCK-as-REX.R */
1200             assert_no_prefix(ins, PPS_LREP);
1201             length++;
1202         } else {
1203             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1204             return -1;
1205         }
1206     }
1207
1208     return length;
1209 }
1210
1211 #define EMIT_REX()                                                      \
1212     if (!(ins->rex & (REX_D|REX_V)) && (ins->rex & REX_REAL) && (bits == 64)) { \
1213         ins->rex = (ins->rex & REX_REAL)|REX_P;                         \
1214         out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG); \
1215         ins->rex = 0;                                                   \
1216         offset += 1; \
1217     }
1218
1219 static void gencode(int32_t segment, int64_t offset, int bits,
1220                     insn * ins, const struct itemplate *temp,
1221                     int64_t insn_end)
1222 {
1223     static char condval[] = {   /* conditional opcodes */
1224         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1225         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1226         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1227     };
1228     uint8_t c;
1229     uint8_t bytes[4];
1230     int64_t size;
1231     int64_t data;
1232     int op1, op2;
1233     struct operand *opx;
1234     const uint8_t *codes = temp->code;
1235     uint8_t opex = 0;
1236
1237     while (*codes) {
1238         c = *codes++;
1239         op1 = (c & 3) + ((opex & 1) << 2);
1240         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
1241         opx = &ins->oprs[op1];
1242         opex = 0;               /* For the next iteration */
1243
1244         switch (c) {
1245         case 01:
1246         case 02:
1247         case 03:
1248         case 04:
1249             EMIT_REX();
1250             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1251             codes += c;
1252             offset += c;
1253             break;
1254
1255         case 05:
1256         case 06:
1257         case 07:
1258             opex = c;
1259             break;
1260
1261         case4(010):
1262             EMIT_REX();
1263             bytes[0] = *codes++ + (regval(opx) & 7);
1264             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1265             offset += 1;
1266             break;
1267
1268         case4(014):
1269             /* The test for BITS8 and SBYTE here is intended to avoid
1270                warning on optimizer actions due to SBYTE, while still
1271                warn on explicit BYTE directives.  Also warn, obviously,
1272                if the optimizer isn't enabled. */
1273             if (((opx->type & BITS8) ||
1274                  !(opx->type & temp->opd[op1] & BYTENESS)) &&
1275                 (opx->offset < -128 || opx->offset > 127)) {
1276                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1277                         "signed byte value exceeds bounds");
1278             }
1279             if (opx->segment != NO_SEG) {
1280                 data = opx->offset;
1281                 out(offset, segment, &data, OUT_ADDRESS, 1,
1282                     opx->segment, opx->wrt);
1283             } else {
1284                 bytes[0] = opx->offset;
1285                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1286                     NO_SEG);
1287             }
1288             offset += 1;
1289             break;
1290
1291         case4(020):
1292             if (opx->offset < -256 || opx->offset > 255) {
1293                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1294                         "byte value exceeds bounds");
1295             }
1296             if (opx->segment != NO_SEG) {
1297                 data = opx->offset;
1298                 out(offset, segment, &data, OUT_ADDRESS, 1,
1299                     opx->segment, opx->wrt);
1300             } else {
1301                 bytes[0] = opx->offset;
1302                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1303                     NO_SEG);
1304             }
1305             offset += 1;
1306             break;
1307
1308         case4(024):
1309             if (opx->offset < 0 || opx->offset > 255)
1310                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1311                         "unsigned byte value exceeds bounds");
1312             if (opx->segment != NO_SEG) {
1313                 data = opx->offset;
1314                 out(offset, segment, &data, OUT_ADDRESS, 1,
1315                     opx->segment, opx->wrt);
1316             } else {
1317                 bytes[0] = opx->offset;
1318                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1319                     NO_SEG);
1320             }
1321             offset += 1;
1322             break;
1323
1324         case4(030):
1325             warn_overflow_opd(opx, 2);
1326             data = opx->offset;
1327             out(offset, segment, &data, OUT_ADDRESS, 2,
1328                 opx->segment, opx->wrt);
1329             offset += 2;
1330             break;
1331
1332         case4(034):
1333             if (opx->type & (BITS16 | BITS32))
1334                 size = (opx->type & BITS16) ? 2 : 4;
1335             else
1336                 size = (bits == 16) ? 2 : 4;
1337             warn_overflow_opd(opx, size);
1338             data = opx->offset;
1339             out(offset, segment, &data, OUT_ADDRESS, size,
1340                 opx->segment, opx->wrt);
1341             offset += size;
1342             break;
1343
1344         case4(040):
1345             warn_overflow_opd(opx, 4);
1346             data = opx->offset;
1347             out(offset, segment, &data, OUT_ADDRESS, 4,
1348                 opx->segment, opx->wrt);
1349             offset += 4;
1350             break;
1351
1352         case4(044):
1353             data = opx->offset;
1354             size = ins->addr_size >> 3;
1355             warn_overflow_opd(opx, size);
1356             out(offset, segment, &data, OUT_ADDRESS, size,
1357                 opx->segment, opx->wrt);
1358             offset += size;
1359             break;
1360
1361         case4(050):
1362             if (opx->segment != segment) {
1363                 data = opx->offset;
1364                 out(offset, segment, &data,
1365                     OUT_REL1ADR, insn_end - offset,
1366                     opx->segment, opx->wrt);
1367             } else {
1368                 data = opx->offset - insn_end;
1369                 if (data > 127 || data < -128)
1370                     errfunc(ERR_NONFATAL, "short jump is out of range");
1371                 out(offset, segment, &data,
1372                     OUT_ADDRESS, 1, NO_SEG, NO_SEG);
1373             }
1374             offset += 1;
1375             break;
1376
1377         case4(054):
1378             data = (int64_t)opx->offset;
1379             out(offset, segment, &data, OUT_ADDRESS, 8,
1380                 opx->segment, opx->wrt);
1381             offset += 8;
1382             break;
1383
1384         case4(060):
1385             if (opx->segment != segment) {
1386                 data = opx->offset;
1387                 out(offset, segment, &data,
1388                     OUT_REL2ADR, insn_end - offset,
1389                     opx->segment, opx->wrt);
1390             } else {
1391                 data = opx->offset - insn_end;
1392                 out(offset, segment, &data,
1393                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1394             }
1395             offset += 2;
1396             break;
1397
1398         case4(064):
1399             if (opx->type & (BITS16 | BITS32 | BITS64))
1400                 size = (opx->type & BITS16) ? 2 : 4;
1401             else
1402                 size = (bits == 16) ? 2 : 4;
1403             if (opx->segment != segment) {
1404                 data = opx->offset;
1405                 out(offset, segment, &data,
1406                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1407                     insn_end - offset, opx->segment, opx->wrt);
1408             } else {
1409                 data = opx->offset - insn_end;
1410                 out(offset, segment, &data,
1411                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1412             }
1413             offset += size;
1414             break;
1415
1416         case4(070):
1417             if (opx->segment != segment) {
1418                 data = opx->offset;
1419                 out(offset, segment, &data,
1420                     OUT_REL4ADR, insn_end - offset,
1421                     opx->segment, opx->wrt);
1422             } else {
1423                 data = opx->offset - insn_end;
1424                 out(offset, segment, &data,
1425                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1426             }
1427             offset += 4;
1428             break;
1429
1430         case4(074):
1431             if (opx->segment == NO_SEG)
1432                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1433                         " relocatable");
1434             data = 0;
1435             out(offset, segment, &data, OUT_ADDRESS, 2,
1436                 outfmt->segbase(1 + opx->segment),
1437                 opx->wrt);
1438             offset += 2;
1439             break;
1440
1441         case4(0140):
1442             data = opx->offset;
1443             warn_overflow_opd(opx, 2);
1444             if (is_sbyte16(opx, 0)) {
1445                 bytes[0] = data;
1446                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1447                     NO_SEG);
1448                 offset++;
1449             } else {
1450                 out(offset, segment, &data, OUT_ADDRESS, 2,
1451                     opx->segment, opx->wrt);
1452                 offset += 2;
1453             }
1454             break;
1455
1456         case4(0144):
1457             EMIT_REX();
1458             bytes[0] = *codes++;
1459             if (is_sbyte16(opx, 0))
1460                 bytes[0] |= 2;  /* s-bit */
1461             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1462             offset++;
1463             break;
1464
1465         case4(0150):
1466             data = opx->offset;
1467             warn_overflow_opd(opx, 4);
1468             if (is_sbyte32(opx, 0)) {
1469                 bytes[0] = data;
1470                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1471                     NO_SEG);
1472                 offset++;
1473             } else {
1474                 out(offset, segment, &data, OUT_ADDRESS, 4,
1475                     opx->segment, opx->wrt);
1476                 offset += 4;
1477             }
1478             break;
1479
1480         case4(0154):
1481             EMIT_REX();
1482             bytes[0] = *codes++;
1483             if (is_sbyte32(opx, 0))
1484                 bytes[0] |= 2;  /* s-bit */
1485             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1486             offset++;
1487             break;
1488
1489         case4(0160):
1490         case4(0164):
1491             break;
1492
1493         case 0171:
1494             bytes[0] =
1495                 (ins->drexdst << 4) |
1496                 (ins->rex & REX_OC ? 0x08 : 0) |
1497                 (ins->rex & (REX_R|REX_X|REX_B));
1498             ins->rex = 0;
1499             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1500             offset++;
1501             break;
1502
1503         case 0172:
1504             c = *codes++;
1505             opx = &ins->oprs[c >> 3];
1506             bytes[0] = nasm_regvals[opx->basereg] << 4;
1507             opx = &ins->oprs[c & 7];
1508             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1509                 errfunc(ERR_NONFATAL,
1510                         "non-absolute expression not permitted as argument %d",
1511                         c & 7);
1512             } else {
1513                 if (opx->offset & ~15) {
1514                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1515                             "four-bit argument exceeds bounds");
1516                 }
1517                 bytes[0] |= opx->offset & 15;
1518             }
1519             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1520             offset++;
1521             break;
1522
1523         case 0173:
1524             c = *codes++;
1525             opx = &ins->oprs[c >> 4];
1526             bytes[0] = nasm_regvals[opx->basereg] << 4;
1527             bytes[0] |= c & 15;
1528             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1529             offset++;
1530             break;
1531
1532         case 0174:
1533             c = *codes++;
1534             opx = &ins->oprs[c];
1535             bytes[0] = nasm_regvals[opx->basereg] << 4;
1536             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1537             offset++;
1538             break;
1539
1540         case4(0250):
1541             data = opx->offset;
1542             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1543                 (int32_t)data != (int64_t)data) {
1544                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1545                         "signed dword immediate exceeds bounds");
1546             }
1547             if (is_sbyte32(opx, 0)) {
1548                 bytes[0] = data;
1549                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1550                     NO_SEG);
1551                 offset++;
1552             } else {
1553                 out(offset, segment, &data, OUT_ADDRESS, 4,
1554                     opx->segment, opx->wrt);
1555                 offset += 4;
1556             }
1557             break;
1558
1559         case4(0254):
1560             data = opx->offset;
1561             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1562                 (int32_t)data != (int64_t)data) {
1563                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1564                         "signed dword immediate exceeds bounds");
1565             }
1566             out(offset, segment, &data, OUT_ADDRESS, 4,
1567                 opx->segment, opx->wrt);
1568             offset += 4;
1569             break;
1570
1571         case4(0260):
1572         case 0270:
1573             codes += 2;
1574             if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1575                 bytes[0] = (ins->vex_cm >> 6) ? 0x8f : 0xc4;
1576                 bytes[1] = (ins->vex_cm & 31) | ((~ins->rex & 7) << 5);
1577                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1578                     ((~ins->drexdst & 15)<< 3) | (ins->vex_wlp & 07);
1579                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1580                 offset += 3;
1581             } else {
1582                 bytes[0] = 0xc5;
1583                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1584                     ((~ins->drexdst & 15) << 3) | (ins->vex_wlp & 07);
1585                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1586                 offset += 2;
1587             }
1588             break;
1589
1590         case4(0274):
1591         {
1592             uint64_t uv, um;
1593             int s;
1594
1595             if (ins->rex & REX_W)
1596                 s = 64;
1597             else if (ins->prefixes[PPS_OSIZE] == P_O16)
1598                 s = 16;
1599             else if (ins->prefixes[PPS_OSIZE] == P_O32)
1600                 s = 32;
1601             else
1602                 s = bits;
1603
1604             um = (uint64_t)2 << (s-1);
1605             uv = opx->offset;
1606
1607             if (uv > 127 && uv < (uint64_t)-128 &&
1608                 (uv < um-128 || uv > um-1)) {
1609                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1610                         "signed byte value exceeds bounds");
1611             }
1612             if (opx->segment != NO_SEG) {
1613                 data = uv;
1614                 out(offset, segment, &data, OUT_ADDRESS, 1,
1615                     opx->segment, opx->wrt);
1616             } else {
1617                 bytes[0] = uv;
1618                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1619                     NO_SEG);
1620             }
1621             offset += 1;
1622             break;
1623         }
1624
1625         case4(0300):
1626             break;
1627
1628         case 0310:
1629             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1630                 *bytes = 0x67;
1631                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1632                 offset += 1;
1633             } else
1634                 offset += 0;
1635             break;
1636
1637         case 0311:
1638             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1639                 *bytes = 0x67;
1640                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1641                 offset += 1;
1642             } else
1643                 offset += 0;
1644             break;
1645
1646         case 0312:
1647             break;
1648
1649         case 0313:
1650             ins->rex = 0;
1651             break;
1652
1653         case4(0314):
1654             break;
1655
1656         case 0320:
1657             if (bits != 16) {
1658                 *bytes = 0x66;
1659                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1660                 offset += 1;
1661             } else
1662                 offset += 0;
1663             break;
1664
1665         case 0321:
1666             if (bits == 16) {
1667                 *bytes = 0x66;
1668                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1669                 offset += 1;
1670             } else
1671                 offset += 0;
1672             break;
1673
1674         case 0322:
1675         case 0323:
1676             break;
1677
1678         case 0324:
1679             ins->rex |= REX_W;
1680             break;
1681
1682         case 0325:
1683             break;
1684
1685         case 0330:
1686             *bytes = *codes++ ^ condval[ins->condition];
1687             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1688             offset += 1;
1689             break;
1690
1691         case 0331:
1692             break;
1693
1694         case 0332:
1695         case 0333:
1696             *bytes = c - 0332 + 0xF2;
1697             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1698             offset += 1;
1699             break;
1700
1701         case 0334:
1702             if (ins->rex & REX_R) {
1703                 *bytes = 0xF0;
1704                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1705                 offset += 1;
1706             }
1707             ins->rex &= ~(REX_L|REX_R);
1708             break;
1709
1710         case 0335:
1711             break;
1712
1713         case 0336:
1714         case 0337:
1715             break;
1716
1717         case 0340:
1718             if (ins->oprs[0].segment != NO_SEG)
1719                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1720             else {
1721                 int64_t size = ins->oprs[0].offset;
1722                 if (size > 0)
1723                     out(offset, segment, NULL,
1724                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1725                 offset += size;
1726             }
1727             break;
1728
1729         case 0341:
1730             break;
1731
1732         case 0344:
1733         case 0345:
1734             bytes[0] = c & 1;
1735             switch (ins->oprs[0].basereg) {
1736             case R_CS:
1737                 bytes[0] += 0x0E;
1738                 break;
1739             case R_DS:
1740                 bytes[0] += 0x1E;
1741                 break;
1742             case R_ES:
1743                 bytes[0] += 0x06;
1744                 break;
1745             case R_SS:
1746                 bytes[0] += 0x16;
1747                 break;
1748             default:
1749                 errfunc(ERR_PANIC,
1750                         "bizarre 8086 segment register received");
1751             }
1752             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1753             offset++;
1754             break;
1755
1756         case 0346:
1757         case 0347:
1758             bytes[0] = c & 1;
1759             switch (ins->oprs[0].basereg) {
1760             case R_FS:
1761                 bytes[0] += 0xA0;
1762                 break;
1763             case R_GS:
1764                 bytes[0] += 0xA8;
1765                 break;
1766             default:
1767                 errfunc(ERR_PANIC,
1768                         "bizarre 386 segment register received");
1769             }
1770             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1771             offset++;
1772             break;
1773
1774         case 0360:
1775             break;
1776
1777         case 0361:
1778             bytes[0] = 0x66;
1779             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1780             offset += 1;
1781             break;
1782
1783         case 0362:
1784         case 0363:
1785             bytes[0] = c - 0362 + 0xf2;
1786             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1787             offset += 1;
1788             break;
1789
1790         case 0364:
1791         case 0365:
1792             break;
1793
1794         case 0366:
1795         case 0367:
1796             *bytes = c - 0366 + 0x66;
1797             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1798             offset += 1;
1799             break;
1800
1801         case 0370:
1802         case 0371:
1803         case 0372:
1804             break;
1805
1806         case 0373:
1807             *bytes = bits == 16 ? 3 : 5;
1808             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1809             offset += 1;
1810             break;
1811
1812         case4(0100):
1813         case4(0110):
1814         case4(0120):
1815         case4(0130):
1816         case4(0200):
1817         case4(0204):
1818         case4(0210):
1819         case4(0214):
1820         case4(0220):
1821         case4(0224):
1822         case4(0230):
1823         case4(0234):
1824             {
1825                 ea ea_data;
1826                 int rfield;
1827                 opflags_t rflags;
1828                 uint8_t *p;
1829                 int32_t s;
1830                 enum out_type type;
1831                 struct operand *opy = &ins->oprs[op2];
1832
1833                 if (c <= 0177) {
1834                     /* pick rfield from operand b (opx) */
1835                     rflags = regflag(opx);
1836                     rfield = nasm_regvals[opx->basereg];
1837                 } else {
1838                     /* rfield is constant */
1839                     rflags = 0;
1840                     rfield = c & 7;
1841                 }
1842
1843                 if (!process_ea(opy, &ea_data, bits, ins->addr_size,
1844                                 rfield, rflags)) {
1845                     errfunc(ERR_NONFATAL, "invalid effective address");
1846                 }
1847
1848
1849                 p = bytes;
1850                 *p++ = ea_data.modrm;
1851                 if (ea_data.sib_present)
1852                     *p++ = ea_data.sib;
1853
1854                 /* DREX suffixes come between the SIB and the displacement */
1855                 if (ins->rex & REX_D) {
1856                     *p++ = (ins->drexdst << 4) |
1857                            (ins->rex & REX_OC ? 0x08 : 0) |
1858                            (ins->rex & (REX_R|REX_X|REX_B));
1859                     ins->rex = 0;
1860                 }
1861
1862                 s = p - bytes;
1863                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1864
1865                 /*
1866                  * Make sure the address gets the right offset in case
1867                  * the line breaks in the .lst file (BR 1197827)
1868                  */
1869                 offset += s;
1870                 s = 0;
1871
1872                 switch (ea_data.bytes) {
1873                 case 0:
1874                     break;
1875                 case 1:
1876                 case 2:
1877                 case 4:
1878                 case 8:
1879                     data = opy->offset;
1880                     s += ea_data.bytes;
1881                     if (ea_data.rip) {
1882                         if (opy->segment == segment) {
1883                             data -= insn_end;
1884                             if (overflow_signed(data, ea_data.bytes))
1885                                 warn_overflow(ERR_PASS2, ea_data.bytes);
1886                             out(offset, segment, &data, OUT_ADDRESS,
1887                                 ea_data.bytes, NO_SEG, NO_SEG);
1888                         } else {
1889                             /* overflow check in output/linker? */
1890                             out(offset, segment, &data, OUT_REL4ADR,
1891                                 insn_end - offset, opy->segment, opy->wrt);
1892                         }
1893                     } else {
1894                         if (overflow_general(opy->offset, ins->addr_size >> 3) ||
1895                             signed_bits(opy->offset, ins->addr_size) !=
1896                             signed_bits(opy->offset, ea_data.bytes * 8))
1897                             warn_overflow(ERR_PASS2, ea_data.bytes);
1898
1899                         type = OUT_ADDRESS;
1900                         out(offset, segment, &data, OUT_ADDRESS,
1901                             ea_data.bytes, opy->segment, opy->wrt);
1902                     }
1903                     break;
1904                 default:
1905                     /* Impossible! */
1906                     errfunc(ERR_PANIC,
1907                             "Invalid amount of bytes (%d) for offset?!",
1908                             ea_data.bytes);
1909                     break;
1910                 }
1911                 offset += s;
1912             }
1913             break;
1914
1915         default:
1916             errfunc(ERR_PANIC, "internal instruction table corrupt"
1917                     ": instruction code \\%o (0x%02X) given", c, c);
1918             break;
1919         }
1920     }
1921 }
1922
1923 static opflags_t regflag(const operand * o)
1924 {
1925     if (!is_register(o->basereg))
1926         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1927     return nasm_reg_flags[o->basereg];
1928 }
1929
1930 static int32_t regval(const operand * o)
1931 {
1932     if (!is_register(o->basereg))
1933         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1934     return nasm_regvals[o->basereg];
1935 }
1936
1937 static int op_rexflags(const operand * o, int mask)
1938 {
1939     opflags_t flags;
1940     int val;
1941
1942     if (!is_register(o->basereg))
1943         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1944
1945     flags = nasm_reg_flags[o->basereg];
1946     val = nasm_regvals[o->basereg];
1947
1948     return rexflags(val, flags, mask);
1949 }
1950
1951 static int rexflags(int val, opflags_t flags, int mask)
1952 {
1953     int rex = 0;
1954
1955     if (val >= 8)
1956         rex |= REX_B|REX_X|REX_R;
1957     if (flags & BITS64)
1958         rex |= REX_W;
1959     if (!(REG_HIGH & ~flags))   /* AH, CH, DH, BH */
1960         rex |= REX_H;
1961     else if (!(REG8 & ~flags) && val >= 4) /* SPL, BPL, SIL, DIL */
1962         rex |= REX_P;
1963
1964     return rex & mask;
1965 }
1966
1967 static enum match_result find_match(const struct itemplate **tempp,
1968                                     insn *instruction,
1969                                     int32_t segment, int64_t offset, int bits)
1970 {
1971     const struct itemplate *temp;
1972     enum match_result m, merr;
1973     opflags_t xsizeflags[MAX_OPERANDS];
1974     bool opsizemissing = false;
1975     int i;
1976
1977     for (i = 0; i < instruction->operands; i++)
1978         xsizeflags[i] = instruction->oprs[i].type & SIZE_MASK;
1979
1980     merr = MERR_INVALOP;
1981
1982     for (temp = nasm_instructions[instruction->opcode];
1983          temp->opcode != I_none; temp++) {
1984         m = matches(temp, instruction, bits);
1985         if (m == MOK_JUMP) {
1986             if (jmp_match(segment, offset, bits, instruction, temp->code))
1987                 m = MOK_GOOD;
1988             else
1989                 m = MERR_INVALOP;
1990         } else if (m == MERR_OPSIZEMISSING &&
1991                    (temp->flags & IF_SMASK) != IF_SX) {
1992             /*
1993              * Missing operand size and a candidate for fuzzy matching...
1994              */
1995             for (i = 0; i < temp->operands; i++) {
1996                 if ((temp->opd[i] & SAME_AS) == 0)
1997                     xsizeflags[i] |= temp->opd[i] & SIZE_MASK;
1998             }
1999             opsizemissing = true;
2000         }
2001         if (m > merr)
2002             merr = m;
2003         if (merr == MOK_GOOD)
2004             goto done;
2005     }
2006
2007     /* No match, but see if we can get a fuzzy operand size match... */
2008     if (!opsizemissing)
2009         goto done;
2010
2011     for (i = 0; i < instruction->operands; i++) {
2012         /*
2013          * We ignore extrinsic operand sizes on registers, so we should
2014          * never try to fuzzy-match on them.  This also resolves the case
2015          * when we have e.g. "xmmrm128" in two different positions.
2016          */
2017         if (is_class(REGISTER, instruction->oprs[i].type))
2018             continue;
2019
2020         /* This tests if xsizeflags[i] has more than one bit set */
2021         if ((xsizeflags[i] & (xsizeflags[i]-1)))
2022             goto done;          /* No luck */
2023
2024         instruction->oprs[i].type |= xsizeflags[i]; /* Set the size */
2025     }
2026
2027     /* Try matching again... */
2028     for (temp = nasm_instructions[instruction->opcode];
2029          temp->opcode != I_none; temp++) {
2030         m = matches(temp, instruction, bits);
2031         if (m == MOK_JUMP) {
2032             if (jmp_match(segment, offset, bits, instruction, temp->code))
2033                 m = MOK_GOOD;
2034             else
2035                 m = MERR_INVALOP;
2036         }
2037         if (m > merr)
2038             merr = m;
2039         if (merr == MOK_GOOD)
2040             goto done;
2041     }
2042
2043 done:
2044     *tempp = temp;
2045     return merr;
2046 }
2047
2048 static enum match_result matches(const struct itemplate *itemp,
2049                                  insn *instruction, int bits)
2050 {
2051     int i, size[MAX_OPERANDS], asize, oprs;
2052     bool opsizemissing = false;
2053
2054     /*
2055      * Check the opcode
2056      */
2057     if (itemp->opcode != instruction->opcode)
2058         return MERR_INVALOP;
2059
2060     /*
2061      * Count the operands
2062      */
2063     if (itemp->operands != instruction->operands)
2064         return MERR_INVALOP;
2065
2066     /*
2067      * Check that no spurious colons or TOs are present
2068      */
2069     for (i = 0; i < itemp->operands; i++)
2070         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
2071             return MERR_INVALOP;
2072
2073     /*
2074      * Process size flags
2075      */
2076     switch (itemp->flags & IF_SMASK) {
2077     case IF_SB:
2078         asize = BITS8;
2079         break;
2080     case IF_SW:
2081         asize = BITS16;
2082         break;
2083     case IF_SD:
2084         asize = BITS32;
2085         break;
2086     case IF_SQ:
2087         asize = BITS64;
2088         break;
2089     case IF_SO:
2090         asize = BITS128;
2091         break;
2092     case IF_SY:
2093         asize = BITS256;
2094         break;
2095     case IF_SZ:
2096         switch (bits) {
2097         case 16:
2098             asize = BITS16;
2099             break;
2100         case 32:
2101             asize = BITS32;
2102             break;
2103         case 64:
2104             asize = BITS64;
2105             break;
2106         default:
2107             asize = 0;
2108             break;
2109         }
2110         break;
2111     default:
2112         asize = 0;
2113         break;
2114     }
2115
2116     if (itemp->flags & IF_ARMASK) {
2117         /* S- flags only apply to a specific operand */
2118         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
2119         memset(size, 0, sizeof size);
2120         size[i] = asize;
2121     } else {
2122         /* S- flags apply to all operands */
2123         for (i = 0; i < MAX_OPERANDS; i++)
2124             size[i] = asize;
2125     }
2126
2127     /*
2128      * Check that the operand flags all match up,
2129      * it's a bit tricky so lets be verbose:
2130      *
2131      * 1) Find out the size of operand. If instruction
2132      *    doesn't have one specified -- we're trying to
2133      *    guess it either from template (IF_S* flag) or
2134      *    from code bits.
2135      *
2136      * 2) If template operand (i) has SAME_AS flag [used for registers only]
2137      *    (ie the same operand as was specified somewhere in template, and
2138      *    this referred operand index is being achieved via ~SAME_AS)
2139      *    we are to be sure that both registers (in template and instruction)
2140      *    do exactly match.
2141      *
2142      * 3) If template operand do not match the instruction OR
2143      *    template has an operand size specified AND this size differ
2144      *    from which instruction has (perhaps we got it from code bits)
2145      *    we are:
2146      *      a)  Check that only size of instruction and operand is differ
2147      *          other characteristics do match
2148      *      b)  Perhaps it's a register specified in instruction so
2149      *          for such a case we just mark that operand as "size
2150      *          missing" and this will turn on fuzzy operand size
2151      *          logic facility (handled by a caller)
2152      */
2153     for (i = 0; i < itemp->operands; i++) {
2154         opflags_t type = instruction->oprs[i].type;
2155         if (!(type & SIZE_MASK))
2156             type |= size[i];
2157
2158         if (itemp->opd[i] & SAME_AS) {
2159             int j = itemp->opd[i] & ~SAME_AS;
2160             if (type != instruction->oprs[j].type ||
2161                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2162                 return MERR_INVALOP;
2163         } else if (itemp->opd[i] & ~type ||
2164             ((itemp->opd[i] & SIZE_MASK) &&
2165              ((itemp->opd[i] ^ type) & SIZE_MASK))) {
2166             if ((itemp->opd[i] & ~type & ~SIZE_MASK) || (type & SIZE_MASK)) {
2167                 return MERR_INVALOP;
2168             } else if (!is_class(REGISTER, type)) {
2169                 /*
2170                  * Note: we don't honor extrinsic operand sizes for registers,
2171                  * so "missing operand size" for a register should be
2172                  * considered a wildcard match rather than an error.
2173                  */
2174                 opsizemissing = true;
2175             }
2176         }
2177     }
2178
2179     if (opsizemissing)
2180         return MERR_OPSIZEMISSING;
2181
2182     /*
2183      * Check operand sizes
2184      */
2185     if (itemp->flags & (IF_SM | IF_SM2)) {
2186         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2187         for (i = 0; i < oprs; i++) {
2188             asize = itemp->opd[i] & SIZE_MASK;
2189             if (asize) {
2190                 for (i = 0; i < oprs; i++)
2191                     size[i] = asize;
2192                 break;
2193             }
2194         }
2195     } else {
2196         oprs = itemp->operands;
2197     }
2198
2199     for (i = 0; i < itemp->operands; i++) {
2200         if (!(itemp->opd[i] & SIZE_MASK) &&
2201             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2202             return MERR_OPSIZEMISMATCH;
2203     }
2204
2205     /*
2206      * Check template is okay at the set cpu level
2207      */
2208     if (((itemp->flags & IF_PLEVEL) > cpu))
2209         return MERR_BADCPU;
2210
2211     /*
2212      * Verify the appropriate long mode flag.
2213      */
2214     if ((itemp->flags & (bits == 64 ? IF_NOLONG : IF_LONG)))
2215         return MERR_BADMODE;
2216
2217     /*
2218      * Check if special handling needed for Jumps
2219      */
2220     if ((itemp->code[0] & 0374) == 0370)
2221         return MOK_JUMP;
2222
2223     return MOK_GOOD;
2224 }
2225
2226 static ea *process_ea(operand * input, ea * output, int bits,
2227                       int addrbits, int rfield, opflags_t rflags)
2228 {
2229     bool byte_offs = !!(input->eaflags & EAF_BYTEOFFS);
2230     bool word_offs = !!(input->eaflags & EAF_WORDOFFS);
2231     bool no_offs   = !!(input->eaflags & EAF_NO_OFFS);
2232
2233     output->rip = false;
2234
2235     /* REX flags for the rfield operand */
2236     output->rex |= rexflags(rfield, rflags, REX_R|REX_P|REX_W|REX_H);
2237
2238     if (is_class(REGISTER, input->type)) {  /* register direct */
2239         int i;
2240         opflags_t f;
2241
2242         if (!is_register(input->basereg))
2243             return NULL;
2244         f = regflag(input);
2245         i = nasm_regvals[input->basereg];
2246
2247         if (REG_EA & ~f)
2248             return NULL;        /* Invalid EA register */
2249
2250         output->rex |= op_rexflags(input, REX_B|REX_P|REX_W|REX_H);
2251
2252         output->sib_present = false;             /* no SIB necessary */
2253         output->bytes = 0;  /* no offset necessary either */
2254         output->modrm = 0xC0 | ((rfield & 7) << 3) | (i & 7);
2255     } else {                    /* it's a memory reference */
2256         if (input->basereg == -1
2257             && (input->indexreg == -1 || input->scale == 0)) {
2258             /* it's a pure offset */
2259
2260             if (bits == 64 && ((input->type & IP_REL) == IP_REL) &&
2261                 input->segment == NO_SEG) {
2262                 nasm_error(ERR_WARNING | ERR_PASS1, "absolute address can not be RIP-relative");
2263                 input->type &= ~IP_REL;
2264                 input->type |= MEMORY;
2265             }
2266
2267             if (input->eaflags & EAF_BYTEOFFS ||
2268                 (input->eaflags & EAF_WORDOFFS &&
2269                  input->disp_size != (addrbits != 16 ? 32 : 16))) {
2270                 nasm_error(ERR_WARNING | ERR_PASS1, "displacement size ignored on absolute address");
2271             }
2272
2273             if (bits == 64 && (~input->type & IP_REL)) {
2274               int scale, index, base;
2275               output->sib_present = true;
2276               scale = 0;
2277               index = 4;
2278               base = 5;
2279               output->sib = (scale << 6) | (index << 3) | base;
2280               output->bytes = 4;
2281               output->modrm = 4 | ((rfield & 7) << 3);
2282               output->rip = false;
2283             } else {
2284               output->sib_present = false;
2285               output->bytes = (addrbits != 16 ? 4 : 2);
2286               output->modrm = (addrbits != 16 ? 5 : 6) | ((rfield & 7) << 3);
2287               output->rip = bits == 64;
2288             }
2289         } else {                /* it's an indirection */
2290             int i = input->indexreg, b = input->basereg, s = input->scale;
2291             int hb = input->hintbase, ht = input->hinttype;
2292             int t, it, bt;              /* register numbers */
2293             opflags_t x, ix, bx;        /* register flags */
2294
2295             if (s == 0)
2296                 i = -1;         /* make this easy, at least */
2297
2298             if (is_register(i)) {
2299                 it = nasm_regvals[i];
2300                 ix = nasm_reg_flags[i];
2301             } else {
2302                 it = -1;
2303                 ix = 0;
2304             }
2305
2306             if (is_register(b)) {
2307                 bt = nasm_regvals[b];
2308                 bx = nasm_reg_flags[b];
2309             } else {
2310                 bt = -1;
2311                 bx = 0;
2312             }
2313
2314             /* check for a 32/64-bit memory reference... */
2315             if ((ix|bx) & (BITS32|BITS64)) {
2316                 /* it must be a 32/64-bit memory reference. Firstly we have
2317                  * to check that all registers involved are type E/Rxx. */
2318                 int32_t sok = BITS32|BITS64;
2319
2320                 if (it != -1) {
2321                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2322                         sok &= ix;
2323                     else
2324                         return NULL;
2325                 }
2326
2327                 if (bt != -1) {
2328                     if (REG_GPR & ~bx)
2329                         return NULL; /* Invalid register */
2330                     if (~sok & bx & SIZE_MASK)
2331                         return NULL; /* Invalid size */
2332                     sok &= bx;
2333                 }
2334
2335                 /* While we're here, ensure the user didn't specify
2336                    WORD or QWORD. */
2337                 if (input->disp_size == 16 || input->disp_size == 64)
2338                     return NULL;
2339
2340                 if (addrbits == 16 ||
2341                     (addrbits == 32 && !(sok & BITS32)) ||
2342                     (addrbits == 64 && !(sok & BITS64)))
2343                     return NULL;
2344
2345                 /* now reorganize base/index */
2346                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2347                     ((hb == b && ht == EAH_NOTBASE)
2348                      || (hb == i && ht == EAH_MAKEBASE))) {
2349                     /* swap if hints say so */
2350                     t = bt, bt = it, it = t;
2351                     x = bx, bx = ix, ix = x;
2352                 }
2353                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2354                     bt = -1, bx = 0, s++;
2355                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2356                     /* make single reg base, unless hint */
2357                     bt = it, bx = ix, it = -1, ix = 0;
2358                 }
2359                 if (((s == 2 && it != REG_NUM_ESP
2360                       && !(input->eaflags & EAF_TIMESTWO)) || s == 3
2361                      || s == 5 || s == 9) && bt == -1)
2362                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2363                 if (it == -1 && (bt & 7) != REG_NUM_ESP
2364                     && (input->eaflags & EAF_TIMESTWO))
2365                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2366                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2367                 if (s == 1 && it == REG_NUM_ESP) {
2368                     /* swap ESP into base if scale is 1 */
2369                     t = it, it = bt, bt = t;
2370                     x = ix, ix = bx, bx = x;
2371                 }
2372                 if (it == REG_NUM_ESP
2373                     || (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2374                     return NULL;        /* wrong, for various reasons */
2375
2376                 output->rex |= rexflags(it, ix, REX_X);
2377                 output->rex |= rexflags(bt, bx, REX_B);
2378
2379                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2380                     /* no SIB needed */
2381                     int mod, rm;
2382
2383                     if (bt == -1) {
2384                         rm = 5;
2385                         mod = 0;
2386                     } else {
2387                         rm = (bt & 7);
2388                         if (rm != REG_NUM_EBP &&
2389                             (no_offs || is_zero32(input, -1)) &&
2390                             !(byte_offs || word_offs))
2391                             mod = 0;
2392                         else if (byte_offs ||
2393                                  (! word_offs && is_sbyte32(input, -1)) ||
2394                                  (rm == REG_NUM_EBP && no_offs))
2395                             mod = 1;
2396                         else
2397                             mod = 2;
2398                     }
2399
2400                     output->sib_present = false;
2401                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2402                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2403                 } else {
2404                     /* we need a SIB */
2405                     int mod, scale, index, base;
2406
2407                     if (it == -1)
2408                         index = 4, s = 1;
2409                     else
2410                         index = (it & 7);
2411
2412                     switch (s) {
2413                     case 1:
2414                         scale = 0;
2415                         break;
2416                     case 2:
2417                         scale = 1;
2418                         break;
2419                     case 4:
2420                         scale = 2;
2421                         break;
2422                     case 8:
2423                         scale = 3;
2424                         break;
2425                     default:   /* then what the smeg is it? */
2426                         return NULL;    /* panic */
2427                     }
2428
2429                     if (bt == -1) {
2430                         base = 5;
2431                         mod = 0;
2432                     } else {
2433                         base = (bt & 7);
2434                         if (base != REG_NUM_EBP &&
2435                             (no_offs || is_zero32(input, -1)) &&
2436                             !(byte_offs || word_offs))
2437                             mod = 0;
2438                         else if (byte_offs ||
2439                                  (! word_offs && is_sbyte32(input, -1)) ||
2440                                  (base == REG_NUM_EBP && no_offs))
2441                             mod = 1;
2442                         else
2443                             mod = 2;
2444                     }
2445
2446                     output->sib_present = true;
2447                     output->bytes =  (bt == -1 || mod == 2 ? 4 : mod);
2448                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | 4;
2449                     output->sib = (scale << 6) | (index << 3) | base;
2450                 }
2451             } else {            /* it's 16-bit */
2452                 int mod, rm;
2453
2454                 /* check for 64-bit long mode */
2455                 if (addrbits == 64)
2456                     return NULL;
2457
2458                 /* check all registers are BX, BP, SI or DI */
2459                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI
2460                      && b != R_DI) || (i != -1 && i != R_BP && i != R_BX
2461                                        && i != R_SI && i != R_DI))
2462                     return NULL;
2463
2464                 /* ensure the user didn't specify DWORD/QWORD */
2465                 if (input->disp_size == 32 || input->disp_size == 64)
2466                     return NULL;
2467
2468                 if (s != 1 && i != -1)
2469                     return NULL;        /* no can do, in 16-bit EA */
2470                 if (b == -1 && i != -1) {
2471                     int tmp = b;
2472                     b = i;
2473                     i = tmp;
2474                 }               /* swap */
2475                 if ((b == R_SI || b == R_DI) && i != -1) {
2476                     int tmp = b;
2477                     b = i;
2478                     i = tmp;
2479                 }
2480                 /* have BX/BP as base, SI/DI index */
2481                 if (b == i)
2482                     return NULL;        /* shouldn't ever happen, in theory */
2483                 if (i != -1 && b != -1 &&
2484                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2485                     return NULL;        /* invalid combinations */
2486                 if (b == -1)    /* pure offset: handled above */
2487                     return NULL;        /* so if it gets to here, panic! */
2488
2489                 rm = -1;
2490                 if (i != -1)
2491                     switch (i * 256 + b) {
2492                     case R_SI * 256 + R_BX:
2493                         rm = 0;
2494                         break;
2495                     case R_DI * 256 + R_BX:
2496                         rm = 1;
2497                         break;
2498                     case R_SI * 256 + R_BP:
2499                         rm = 2;
2500                         break;
2501                     case R_DI * 256 + R_BP:
2502                         rm = 3;
2503                         break;
2504                 } else
2505                     switch (b) {
2506                     case R_SI:
2507                         rm = 4;
2508                         break;
2509                     case R_DI:
2510                         rm = 5;
2511                         break;
2512                     case R_BP:
2513                         rm = 6;
2514                         break;
2515                     case R_BX:
2516                         rm = 7;
2517                         break;
2518                     }
2519                 if (rm == -1)   /* can't happen, in theory */
2520                     return NULL;        /* so panic if it does */
2521
2522                 if (rm != 6 &&
2523                     (no_offs || is_zero16(input, -1)) &&
2524                     !(byte_offs || word_offs))
2525                     mod = 0;
2526                 else if (byte_offs ||
2527                          (! word_offs && is_sbyte16(input, -1)) ||
2528                          (rm == 6 && no_offs))
2529                     mod = 1;
2530                 else
2531                     mod = 2;
2532
2533                 output->sib_present = false;    /* no SIB - it's 16-bit */
2534                 output->bytes = mod;    /* bytes of offset needed */
2535                 output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2536             }
2537         }
2538     }
2539
2540     output->size = 1 + output->sib_present + output->bytes;
2541     return output;
2542 }
2543
2544 static void add_asp(insn *ins, int addrbits)
2545 {
2546     int j, valid;
2547     int defdisp;
2548
2549     valid = (addrbits == 64) ? 64|32 : 32|16;
2550
2551     switch (ins->prefixes[PPS_ASIZE]) {
2552     case P_A16:
2553         valid &= 16;
2554         break;
2555     case P_A32:
2556         valid &= 32;
2557         break;
2558     case P_A64:
2559         valid &= 64;
2560         break;
2561     case P_ASP:
2562         valid &= (addrbits == 32) ? 16 : 32;
2563         break;
2564     default:
2565         break;
2566     }
2567
2568     for (j = 0; j < ins->operands; j++) {
2569         if (is_class(MEMORY, ins->oprs[j].type)) {
2570             opflags_t i, b;
2571
2572             /* Verify as Register */
2573             if (!is_register(ins->oprs[j].indexreg))
2574                 i = 0;
2575             else
2576                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2577
2578             /* Verify as Register */
2579             if (!is_register(ins->oprs[j].basereg))
2580                 b = 0;
2581             else
2582                 b = nasm_reg_flags[ins->oprs[j].basereg];
2583
2584             if (ins->oprs[j].scale == 0)
2585                 i = 0;
2586
2587             if (!i && !b) {
2588                 int ds = ins->oprs[j].disp_size;
2589                 if ((addrbits != 64 && ds > 8) ||
2590                     (addrbits == 64 && ds == 16))
2591                     valid &= ds;
2592             } else {
2593                 if (!(REG16 & ~b))
2594                     valid &= 16;
2595                 if (!(REG32 & ~b))
2596                     valid &= 32;
2597                 if (!(REG64 & ~b))
2598                     valid &= 64;
2599
2600                 if (!(REG16 & ~i))
2601                     valid &= 16;
2602                 if (!(REG32 & ~i))
2603                     valid &= 32;
2604                 if (!(REG64 & ~i))
2605                     valid &= 64;
2606             }
2607         }
2608     }
2609
2610     if (valid & addrbits) {
2611         ins->addr_size = addrbits;
2612     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2613         /* Add an address size prefix */
2614         enum prefixes pref = (addrbits == 32) ? P_A16 : P_A32;
2615         ins->prefixes[PPS_ASIZE] = pref;
2616         ins->addr_size = (addrbits == 32) ? 16 : 32;
2617     } else {
2618         /* Impossible... */
2619         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2620         ins->addr_size = addrbits; /* Error recovery */
2621     }
2622
2623     defdisp = ins->addr_size == 16 ? 16 : 32;
2624
2625     for (j = 0; j < ins->operands; j++) {
2626         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2627             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp)
2628             != ins->addr_size) {
2629             /* mem_offs sizes must match the address size; if not,
2630                strip the MEM_OFFS bit and match only EA instructions */
2631             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2632         }
2633     }
2634 }