assemble.c

   1 /* ----------------------------------------------------------------------- *
   2  *
   3  *   Copyright 1996-2009 The NASM Authors - All Rights Reserved
   4  *   See the file AUTHORS included with the NASM distribution for
   5  *   the specific copyright holders.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following
   9  *   conditions are met:
  10  *
  11  *   * Redistributions of source code must retain the above copyright
  12  *     notice, this list of conditions and the following disclaimer.
  13  *   * Redistributions in binary form must reproduce the above
  14  *     copyright notice, this list of conditions and the following
  15  *     disclaimer in the documentation and/or other materials provided
  16  *     with the distribution.
  17  *
  18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  19  *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  20  *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  21  *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22  *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  23  *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24  *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  25  *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28  *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  29  *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  30  *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31  *
  32  * ----------------------------------------------------------------------- */
  33
  34 /*
  35  * assemble.c   code generation for the Netwide Assembler
  36  *
  37  * the actual codes (C syntax, i.e. octal):
  38  * \0            - terminates the code. (Unless it's a literal of course.)
  39  * \1..\4        - that many literal bytes follow in the code stream
  40  * \5            - add 4 to the primary operand number (b, low octdigit)
  41  * \6            - add 4 to the secondary operand number (a, middle octdigit)
  42  * \7            - add 4 to both the primary and the secondary operand number
  43  * \10..\13      - a literal byte follows in the code stream, to be added
  44  *                 to the register value of operand 0..3
  45  * \14..\17      - a signed byte immediate operand, from operand 0..3
  46  * \20..\23      - a byte immediate operand, from operand 0..3
  47  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  48  * \30..\33      - a word immediate operand, from operand 0..3
  49  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  50  *                 assembly mode or the operand-size override on the operand
  51  * \40..\43      - a long immediate operand, from operand 0..3
  52  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  53  *                 depending on the address size of the instruction.
  54  * \50..\53      - a byte relative operand, from operand 0..3
  55  * \54..\57      - a qword immediate operand, from operand 0..3
  56  * \60..\63      - a word relative operand, from operand 0..3
  57  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  58  *                 assembly mode or the operand-size override on the operand
  59  * \70..\73      - a long relative operand, from operand 0..3
  60  * \74..\77      - a word constant, from the _segment_ part of operand 0..3
  61  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  62  *                 field the register value of operand b.
  63  * \140..\143    - an immediate word or signed byte for operand 0..3
  64  * \144..\147    - or 2 (s-field) into opcode byte if operand 0..3
  65  *                  is a signed byte rather than a word.  Opcode byte follows.
  66  * \150..\153    - an immediate dword or signed byte for operand 0..3
  67  * \154..\157    - or 2 (s-field) into opcode byte if operand 0..3
  68  *                  is a signed byte rather than a dword.  Opcode byte follows.
  69  * \160..\163    - this instruction uses DREX rather than REX, with the
  70  *                 OC0 field set to 0, and the dest field taken from
  71  *                 operand 0..3.
  72  * \164..\167    - this instruction uses DREX rather than REX, with the
  73  *                 OC0 field set to 1, and the dest field taken from
  74  *                 operand 0..3.
  75  * \171          - placement of DREX suffix in the absence of an EA
  76  * \172\ab       - the register number from operand a in bits 7..4, with
  77  *                 the 4-bit immediate from operand b in bits 3..0.
  78  * \173\xab      - the register number from operand a in bits 7..4, with
  79  *                 the value b in bits 3..0.
  80  * \174\a        - the register number from operand a in bits 7..4, and
  81  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  82  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  83  *                 field equal to digit b.
  84  * \250..\253    - same as \150..\153, except warn if the 64-bit operand
  85  *                 is not equal to the truncated and sign-extended 32-bit
  86  *                 operand; used for 32-bit immediates in 64-bit mode.
  87  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
  88  * \260..\263    - this instruction uses VEX/XOP rather than REX, with the
  89  *                 V field taken from operand 0..3.
  90  * \270          - this instruction uses VEX/XOP rather than REX, with the
  91  *                 V field set to 1111b.
  92  *
  93  * VEX/XOP prefixes are followed by the sequence:
  94  * \tmm\wlp        where mm is the M field; and wlp is:
  95  *                 00 0ww lpp
  96  *                 [w0] ww = 0 for W = 0
  97  *                 [w1] ww = 1 for W = 1
  98  *                 [wx] ww = 2 for W don't care (always assembled as 0)
  99  *                 [ww] ww = 3 for W used as REX.W
 100  *
 101  * t = 0 for VEX (C4/C5), t = 1 for XOP (8F).
 102  *
 103  * \274..\277    - a signed byte immediate operand, from operand 0..3,
 104  *                 which is to be extended to the operand size.
 105  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
 106  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
 107  * \312          - (disassembler only) invalid with non-default address size.
 108  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
 109  * \314          - (disassembler only) invalid with REX.B
 110  * \315          - (disassembler only) invalid with REX.X
 111  * \316          - (disassembler only) invalid with REX.R
 112  * \317          - (disassembler only) invalid with REX.W
 113  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
 114  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
 115  * \322          - indicates that this instruction is only valid when the
 116  *                 operand size is the default (instruction to disassembler,
 117  *                 generates no code in the assembler)
 118  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
 119  * \324          - indicates 64-bit operand size requiring REX prefix.
 120  * \325          - instruction which always uses spl/bpl/sil/dil
 121  * \330          - a literal byte follows in the code stream, to be added
 122  *                 to the condition code value of the instruction.
 123  * \331          - instruction not valid with REP prefix.  Hint for
 124  *                 disassembler only; for SSE instructions.
 125  * \332          - REP prefix (0xF2 byte) used as opcode extension.
 126  * \333          - REP prefix (0xF3 byte) used as opcode extension.
 127  * \334          - LOCK prefix used as REX.R (used in non-64-bit mode)
 128  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
 129  * \336          - force a REP(E) prefix (0xF2) even if not specified.
 130  * \337          - force a REPNE prefix (0xF3) even if not specified.
 131  *                 \336-\337 are still listed as prefixes in the disassembler.
 132  * \340          - reserve <operand 0> bytes of uninitialized storage.
 133  *                 Operand 0 had better be a segmentless constant.
 134  * \341          - this instruction needs a WAIT "prefix"
 135  * \344,\345     - the PUSH/POP (respectively) codes for CS, DS, ES, SS
 136  *                 (POP is never used for CS) depending on operand 0
 137  * \346,\347     - the second byte of PUSH/POP codes for FS, GS, depending
 138  *                 on operand 0
 139  * \360          - no SSE prefix (== \364\331)
 140  * \361          - 66 SSE prefix (== \366\331)
 141  * \362          - F2 SSE prefix (== \364\332)
 142  * \363          - F3 SSE prefix (== \364\333)
 143  * \364          - operand-size prefix (0x66) not permitted
 144  * \365          - address-size prefix (0x67) not permitted
 145  * \366          - operand-size prefix (0x66) used as opcode extension
 146  * \367          - address-size prefix (0x67) used as opcode extension
 147  * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
 148  *                 370 is used for Jcc, 371 is used for JMP.
 149  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 150  *                 used for conditional jump over longer jump
 151  */
 152
 153 #include "compiler.h"
 154
 155 #include <stdio.h>
 156 #include <string.h>
 157 #include <inttypes.h>
 158
 159 #include "nasm.h"
 160 #include "nasmlib.h"
 161 #include "assemble.h"
 162 #include "insns.h"
 163 #include "tables.h"
 164
 165 enum match_result {
 166     /*
 167      * Matching errors.  These should be sorted so that more specific
 168      * errors come later in the sequence.
 169      */
 170     MERR_INVALOP,
 171     MERR_OPSIZEMISSING,
 172     MERR_OPSIZEMISMATCH,
 173     MERR_BADCPU,
 174     MERR_BADMODE,
 175     /*
 176      * Matching success; the conditional ones first
 177      */
 178     MOK_JUMP,                   /* Matching OK but needs jmp_match() */
 179     MOK_GOOD                    /* Matching unconditionally OK */
 180 };
 181
 182 typedef struct {
 183     int sib_present;                 /* is a SIB byte necessary? */
 184     int bytes;                       /* # of bytes of offset needed */
 185     int size;                        /* lazy - this is sib+bytes+1 */
 186     uint8_t modrm, sib, rex, rip;    /* the bytes themselves */
 187 } ea;
 188
 189 static uint32_t cpu;            /* cpu level received from nasm.c */
 190 static efunc errfunc;
 191 static struct ofmt *outfmt;
 192 static ListGen *list;
 193
 194 static int64_t calcsize(int32_t, int64_t, int, insn *, const uint8_t *);
 195 static void gencode(int32_t segment, int64_t offset, int bits,
 196                     insn * ins, const struct itemplate *temp,
 197                     int64_t insn_end);
 198 static enum match_result find_match(const struct itemplate **tempp,
 199                                     insn *instruction,
 200                                     int32_t segment, int64_t offset, int bits);
 201 static enum match_result matches(const struct itemplate *, insn *, int bits);
 202 static opflags_t regflag(const operand *);
 203 static int32_t regval(const operand *);
 204 static int rexflags(int, opflags_t, int);
 205 static int op_rexflags(const operand *, int);
 206 static ea *process_ea(operand *, ea *, int, int, int, opflags_t);
 207 static void add_asp(insn *, int);
 208
 209 static int has_prefix(insn * ins, enum prefix_pos pos, enum prefixes prefix)
 210 {
 211     return ins->prefixes[pos] == prefix;
 212 }
 213
 214 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 215 {
 216     if (ins->prefixes[pos])
 217         errfunc(ERR_NONFATAL, "invalid %s prefix",
 218                 prefix_name(ins->prefixes[pos]));
 219 }
 220
 221 static const char *size_name(int size)
 222 {
 223     switch (size) {
 224     case 1:
 225         return "byte";
 226     case 2:
 227         return "word";
 228     case 4:
 229         return "dword";
 230     case 8:
 231         return "qword";
 232     case 10:
 233         return "tword";
 234     case 16:
 235         return "oword";
 236     case 32:
 237         return "yword";
 238     default:
 239         return "???";
 240     }
 241 }
 242
 243 static void warn_overflow(int pass, int size)
 244 {
 245     errfunc(ERR_WARNING | pass | ERR_WARN_NOV,
 246             "%s data exceeds bounds", size_name(size));
 247 }
 248
 249 static void warn_overflow_const(int64_t data, int size)
 250 {
 251     if (overflow_general(data, size))
 252         warn_overflow(ERR_PASS1, size);
 253 }
 254
 255 static void warn_overflow_opd(const struct operand *o, int size)
 256 {
 257     if (size < 8 && o->wrt == NO_SEG && o->segment == NO_SEG) {
 258         if (overflow_general(o->offset, size))
 259             warn_overflow(ERR_PASS2, size);
 260     }
 261 }
 262
 263 /*
 264  * This routine wrappers the real output format's output routine,
 265  * in order to pass a copy of the data off to the listing file
 266  * generator at the same time.
 267  */
 268 static void out(int64_t offset, int32_t segto, const void *data,
 269                 enum out_type type, uint64_t size,
 270                 int32_t segment, int32_t wrt)
 271 {
 272     static int32_t lineno = 0;     /* static!!! */
 273     static char *lnfname = NULL;
 274     uint8_t p[8];
 275
 276     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 277         /*
 278          * This is a non-relocated address, and we're going to
 279          * convert it into RAWDATA format.
 280          */
 281         uint8_t *q = p;
 282
 283         if (size > 8) {
 284             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 285             return;
 286         }
 287
 288         WRITEADDR(q, *(int64_t *)data, size);
 289         data = p;
 290         type = OUT_RAWDATA;
 291     }
 292
 293     list->output(offset, data, type, size);
 294
 295     /*
 296      * this call to src_get determines when we call the
 297      * debug-format-specific "linenum" function
 298      * it updates lineno and lnfname to the current values
 299      * returning 0 if "same as last time", -2 if lnfname
 300      * changed, and the amount by which lineno changed,
 301      * if it did. thus, these variables must be static
 302      */
 303
 304     if (src_get(&lineno, &lnfname)) {
 305         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 306     }
 307
 308     outfmt->output(segto, data, type, size, segment, wrt);
 309 }
 310
 311 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 312                      insn * ins, const uint8_t *code)
 313 {
 314     int64_t isize;
 315     uint8_t c = code[0];
 316
 317     if ((c != 0370 && c != 0371) || (ins->oprs[0].type & STRICT))
 318         return false;
 319     if (!optimizing)
 320         return false;
 321     if (optimizing < 0 && c == 0371)
 322         return false;
 323
 324     isize = calcsize(segment, offset, bits, ins, code);
 325
 326     if (ins->oprs[0].opflags & OPFLAG_UNKNOWN)
 327         /* Be optimistic in pass 1 */
 328         return true;
 329
 330     if (ins->oprs[0].segment != segment)
 331         return false;
 332
 333     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 334     return (isize >= -128 && isize <= 127); /* is it byte size? */
 335 }
 336
 337 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 338                  insn * instruction, struct ofmt *output, efunc error,
 339                  ListGen * listgen)
 340 {
 341     const struct itemplate *temp;
 342     int j;
 343     enum match_result m;
 344     int64_t insn_end;
 345     int32_t itimes;
 346     int64_t start = offset;
 347     int64_t wsize;              /* size for DB etc. */
 348
 349     errfunc = error;            /* to pass to other functions */
 350     cpu = cp;
 351     outfmt = output;            /* likewise */
 352     list = listgen;             /* and again */
 353
 354     wsize = idata_bytes(instruction->opcode);
 355     if (wsize == -1)
 356         return 0;
 357
 358     if (wsize) {
 359         extop *e;
 360         int32_t t = instruction->times;
 361         if (t < 0)
 362             errfunc(ERR_PANIC,
 363                     "instruction->times < 0 (%ld) in assemble()", t);
 364
 365         while (t--) {           /* repeat TIMES times */
 366             list_for_each(e, instruction->eops) {
 367                 if (e->type == EOT_DB_NUMBER) {
 368                     if (wsize == 1) {
 369                         if (e->segment != NO_SEG)
 370                             errfunc(ERR_NONFATAL,
 371                                     "one-byte relocation attempted");
 372                         else {
 373                             uint8_t out_byte = e->offset;
 374                             out(offset, segment, &out_byte,
 375                                 OUT_RAWDATA, 1, NO_SEG, NO_SEG);
 376                         }
 377                     } else if (wsize > 8) {
 378                         errfunc(ERR_NONFATAL,
 379                                 "integer supplied to a DT, DO or DY"
 380                                 " instruction");
 381                     } else
 382                         out(offset, segment, &e->offset,
 383                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 384                     offset += wsize;
 385                 } else if (e->type == EOT_DB_STRING ||
 386                            e->type == EOT_DB_STRING_FREE) {
 387                     int align;
 388
 389                     out(offset, segment, e->stringval,
 390                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 391                     align = e->stringlen % wsize;
 392
 393                     if (align) {
 394                         align = wsize - align;
 395                         out(offset, segment, zero_buffer,
 396                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 397                     }
 398                     offset += e->stringlen + align;
 399                 }
 400             }
 401             if (t > 0 && t == instruction->times - 1) {
 402                 /*
 403                  * Dummy call to list->output to give the offset to the
 404                  * listing module.
 405                  */
 406                 list->output(offset, NULL, OUT_RAWDATA, 0);
 407                 list->uplevel(LIST_TIMES);
 408             }
 409         }
 410         if (instruction->times > 1)
 411             list->downlevel(LIST_TIMES);
 412         return offset - start;
 413     }
 414
 415     if (instruction->opcode == I_INCBIN) {
 416         const char *fname = instruction->eops->stringval;
 417         FILE *fp;
 418
 419         fp = fopen(fname, "rb");
 420         if (!fp) {
 421             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 422                   fname);
 423         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 424             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 425                   fname);
 426         } else {
 427             static char buf[4096];
 428             size_t t = instruction->times;
 429             size_t base = 0;
 430             size_t len;
 431
 432             len = ftell(fp);
 433             if (instruction->eops->next) {
 434                 base = instruction->eops->next->offset;
 435                 len -= base;
 436                 if (instruction->eops->next->next &&
 437                     len > (size_t)instruction->eops->next->next->offset)
 438                     len = (size_t)instruction->eops->next->next->offset;
 439             }
 440             /*
 441              * Dummy call to list->output to give the offset to the
 442              * listing module.
 443              */
 444             list->output(offset, NULL, OUT_RAWDATA, 0);
 445             list->uplevel(LIST_INCBIN);
 446             while (t--) {
 447                 size_t l;
 448
 449                 fseek(fp, base, SEEK_SET);
 450                 l = len;
 451                 while (l > 0) {
 452                     int32_t m;
 453                     m = fread(buf, 1, l > sizeof(buf) ? sizeof(buf) : l, fp);
 454                     if (!m) {
 455                         /*
 456                          * This shouldn't happen unless the file
 457                          * actually changes while we are reading
 458                          * it.
 459                          */
 460                         error(ERR_NONFATAL,
 461                               "`incbin': unexpected EOF while"
 462                               " reading file `%s'", fname);
 463                         t = 0;  /* Try to exit cleanly */
 464                         break;
 465                     }
 466                     out(offset, segment, buf, OUT_RAWDATA, m,
 467                         NO_SEG, NO_SEG);
 468                     l -= m;
 469                 }
 470             }
 471             list->downlevel(LIST_INCBIN);
 472             if (instruction->times > 1) {
 473                 /*
 474                  * Dummy call to list->output to give the offset to the
 475                  * listing module.
 476                  */
 477                 list->output(offset, NULL, OUT_RAWDATA, 0);
 478                 list->uplevel(LIST_TIMES);
 479                 list->downlevel(LIST_TIMES);
 480             }
 481             fclose(fp);
 482             return instruction->times * len;
 483         }
 484         return 0;               /* if we're here, there's an error */
 485     }
 486
 487     /* Check to see if we need an address-size prefix */
 488     add_asp(instruction, bits);
 489
 490     m = find_match(&temp, instruction, segment, offset, bits);
 491
 492     if (m == MOK_GOOD) {
 493         /* Matches! */
 494         int64_t insn_size = calcsize(segment, offset, bits,
 495                                      instruction, temp->code);
 496         itimes = instruction->times;
 497         if (insn_size < 0)  /* shouldn't be, on pass two */
 498             error(ERR_PANIC, "errors made it through from pass one");
 499         else
 500             while (itimes--) {
 501                 for (j = 0; j < MAXPREFIX; j++) {
 502                     uint8_t c = 0;
 503                     switch (instruction->prefixes[j]) {
 504                     case P_WAIT:
 505                         c = 0x9B;
 506                         break;
 507                     case P_LOCK:
 508                         c = 0xF0;
 509                         break;
 510                     case P_REPNE:
 511                     case P_REPNZ:
 512                         c = 0xF2;
 513                         break;
 514                     case P_REPE:
 515                     case P_REPZ:
 516                     case P_REP:
 517                         c = 0xF3;
 518                         break;
 519                     case R_CS:
 520                         if (bits == 64) {
 521                             error(ERR_WARNING | ERR_PASS2,
 522                                   "cs segment base generated, but will be ignored in 64-bit mode");
 523                         }
 524                         c = 0x2E;
 525                         break;
 526                     case R_DS:
 527                         if (bits == 64) {
 528                             error(ERR_WARNING | ERR_PASS2,
 529                                   "ds segment base generated, but will be ignored in 64-bit mode");
 530                         }
 531                         c = 0x3E;
 532                         break;
 533                     case R_ES:
 534                         if (bits == 64) {
 535                             error(ERR_WARNING | ERR_PASS2,
 536                                   "es segment base generated, but will be ignored in 64-bit mode");
 537                         }
 538                         c = 0x26;
 539                         break;
 540                     case R_FS:
 541                         c = 0x64;
 542                         break;
 543                     case R_GS:
 544                         c = 0x65;
 545                         break;
 546                     case R_SS:
 547                         if (bits == 64) {
 548                             error(ERR_WARNING | ERR_PASS2,
 549                                   "ss segment base generated, but will be ignored in 64-bit mode");
 550                         }
 551                         c = 0x36;
 552                         break;
 553                     case R_SEGR6:
 554                     case R_SEGR7:
 555                         error(ERR_NONFATAL,
 556                               "segr6 and segr7 cannot be used as prefixes");
 557                         break;
 558                     case P_A16:
 559                         if (bits == 64) {
 560                             error(ERR_NONFATAL,
 561                                   "16-bit addressing is not supported "
 562                                   "in 64-bit mode");
 563                         } else if (bits != 16)
 564                             c = 0x67;
 565                         break;
 566                     case P_A32:
 567                         if (bits != 32)
 568                             c = 0x67;
 569                         break;
 570                     case P_A64:
 571                         if (bits != 64) {
 572                             error(ERR_NONFATAL,
 573                                   "64-bit addressing is only supported "
 574                                   "in 64-bit mode");
 575                         }
 576                         break;
 577                     case P_ASP:
 578                         c = 0x67;
 579                         break;
 580                     case P_O16:
 581                         if (bits != 16)
 582                             c = 0x66;
 583                         break;
 584                     case P_O32:
 585                         if (bits == 16)
 586                             c = 0x66;
 587                         break;
 588                     case P_O64:
 589                         /* REX.W */
 590                         break;
 591                     case P_OSP:
 592                         c = 0x66;
 593                         break;
 594                     case P_none:
 595                         break;
 596                     default:
 597                         error(ERR_PANIC, "invalid instruction prefix");
 598                     }
 599                     if (c != 0) {
 600                         out(offset, segment, &c, OUT_RAWDATA, 1,
 601                             NO_SEG, NO_SEG);
 602                         offset++;
 603                     }
 604                 }
 605                 insn_end = offset + insn_size;
 606                 gencode(segment, offset, bits, instruction,
 607                         temp, insn_end);
 608                 offset += insn_size;
 609                 if (itimes > 0 && itimes == instruction->times - 1) {
 610                     /*
 611                      * Dummy call to list->output to give the offset to the
 612                      * listing module.
 613                      */
 614                     list->output(offset, NULL, OUT_RAWDATA, 0);
 615                     list->uplevel(LIST_TIMES);
 616                 }
 617             }
 618         if (instruction->times > 1)
 619             list->downlevel(LIST_TIMES);
 620         return offset - start;
 621     } else {
 622         /* No match */
 623         switch (m) {
 624         case MERR_OPSIZEMISSING:
 625             error(ERR_NONFATAL, "operation size not specified");
 626             break;
 627         case MERR_OPSIZEMISMATCH:
 628             error(ERR_NONFATAL, "mismatch in operand sizes");
 629             break;
 630         case MERR_BADCPU:
 631             error(ERR_NONFATAL, "no instruction for this cpu level");
 632             break;
 633         case MERR_BADMODE:
 634             error(ERR_NONFATAL, "instruction not supported in %d-bit mode",
 635                   bits);
 636             break;
 637         default:
 638             error(ERR_NONFATAL,
 639                   "invalid combination of opcode and operands");
 640             break;
 641         }
 642     }
 643     return 0;
 644 }
 645
 646 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 647                   insn * instruction, efunc error)
 648 {
 649     const struct itemplate *temp;
 650     enum match_result m;
 651
 652     errfunc = error;            /* to pass to other functions */
 653     cpu = cp;
 654
 655     if (instruction->opcode == I_none)
 656         return 0;
 657
 658     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 659         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 660         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 661         instruction->opcode == I_DY) {
 662         extop *e;
 663         int32_t isize, osize, wsize;
 664
 665         isize = 0;
 666         wsize = idata_bytes(instruction->opcode);
 667
 668         list_for_each(e, instruction->eops) {
 669             int32_t align;
 670
 671             osize = 0;
 672             if (e->type == EOT_DB_NUMBER) {
 673                 osize = 1;
 674                 warn_overflow_const(e->offset, wsize);
 675             } else if (e->type == EOT_DB_STRING ||
 676                        e->type == EOT_DB_STRING_FREE)
 677                 osize = e->stringlen;
 678
 679             align = (-osize) % wsize;
 680             if (align < 0)
 681                 align += wsize;
 682             isize += osize + align;
 683         }
 684         return isize * instruction->times;
 685     }
 686
 687     if (instruction->opcode == I_INCBIN) {
 688         const char *fname = instruction->eops->stringval;
 689         FILE *fp;
 690         int64_t val = 0;
 691         size_t len;
 692
 693         fp = fopen(fname, "rb");
 694         if (!fp)
 695             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 696                   fname);
 697         else if (fseek(fp, 0L, SEEK_END) < 0)
 698             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 699                   fname);
 700         else {
 701             len = ftell(fp);
 702             if (instruction->eops->next) {
 703                 len -= instruction->eops->next->offset;
 704                 if (instruction->eops->next->next &&
 705                     len > (size_t)instruction->eops->next->next->offset) {
 706                     len = (size_t)instruction->eops->next->next->offset;
 707                 }
 708             }
 709             val = instruction->times * len;
 710         }
 711         if (fp)
 712             fclose(fp);
 713         return val;
 714     }
 715
 716     /* Check to see if we need an address-size prefix */
 717     add_asp(instruction, bits);
 718
 719     m = find_match(&temp, instruction, segment, offset, bits);
 720     if (m == MOK_GOOD) {
 721         /* we've matched an instruction. */
 722         int64_t isize;
 723         const uint8_t *codes = temp->code;
 724         int j;
 725
 726         isize = calcsize(segment, offset, bits, instruction, codes);
 727         if (isize < 0)
 728             return -1;
 729         for (j = 0; j < MAXPREFIX; j++) {
 730             switch (instruction->prefixes[j]) {
 731             case P_A16:
 732                 if (bits != 16)
 733                     isize++;
 734                 break;
 735             case P_A32:
 736                 if (bits != 32)
 737                     isize++;
 738                 break;
 739             case P_O16:
 740                 if (bits != 16)
 741                     isize++;
 742                 break;
 743             case P_O32:
 744                 if (bits == 16)
 745                     isize++;
 746                 break;
 747             case P_A64:
 748             case P_O64:
 749             case P_none:
 750                 break;
 751             default:
 752                 isize++;
 753                 break;
 754             }
 755         }
 756         return isize * instruction->times;
 757     } else {
 758         return -1;                  /* didn't match any instruction */
 759     }
 760 }
 761
 762 static bool possible_sbyte(operand *o)
 763 {
 764     return o->wrt == NO_SEG && o->segment == NO_SEG &&
 765         !(o->opflags & OPFLAG_UNKNOWN) &&
 766         optimizing >= 0 && !(o->type & STRICT);
 767 }
 768
 769 /* check that opn[op]  is a signed byte of size 16 or 32 */
 770 static bool is_sbyte16(operand *o)
 771 {
 772     int16_t v;
 773
 774     if (!possible_sbyte(o))
 775         return false;
 776
 777     v = o->offset;
 778     return v >= -128 && v <= 127;
 779 }
 780
 781 static bool is_sbyte32(operand *o)
 782 {
 783     int32_t v;
 784
 785     if (!possible_sbyte(o))
 786         return false;
 787
 788     v = o->offset;
 789     return v >= -128 && v <= 127;
 790 }
 791
 792 /* Common construct */
 793 #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
 794
 795 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 796                         insn * ins, const uint8_t *codes)
 797 {
 798     int64_t length = 0;
 799     uint8_t c;
 800     int rex_mask = ~0;
 801     int op1, op2;
 802     struct operand *opx;
 803     uint8_t opex = 0;
 804
 805     ins->rex = 0;               /* Ensure REX is reset */
 806
 807     if (ins->prefixes[PPS_OSIZE] == P_O64)
 808         ins->rex |= REX_W;
 809
 810     (void)segment;              /* Don't warn that this parameter is unused */
 811     (void)offset;               /* Don't warn that this parameter is unused */
 812
 813     while (*codes) {
 814         c = *codes++;
 815         op1 = (c & 3) + ((opex & 1) << 2);
 816         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
 817         opx = &ins->oprs[op1];
 818         opex = 0;               /* For the next iteration */
 819
 820         switch (c) {
 821         case 01:
 822         case 02:
 823         case 03:
 824         case 04:
 825             codes += c, length += c;
 826             break;
 827
 828         case 05:
 829         case 06:
 830         case 07:
 831             opex = c;
 832             break;
 833
 834         case4(010):
 835             ins->rex |=
 836                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 837             codes++, length++;
 838             break;
 839
 840         case4(014):
 841         case4(020):
 842         case4(024):
 843             length++;
 844             break;
 845
 846         case4(030):
 847             length += 2;
 848             break;
 849
 850         case4(034):
 851             if (opx->type & (BITS16 | BITS32 | BITS64))
 852                 length += (opx->type & BITS16) ? 2 : 4;
 853             else
 854                 length += (bits == 16) ? 2 : 4;
 855             break;
 856
 857         case4(040):
 858             length += 4;
 859             break;
 860
 861         case4(044):
 862             length += ins->addr_size >> 3;
 863             break;
 864
 865         case4(050):
 866             length++;
 867             break;
 868
 869         case4(054):
 870             length += 8; /* MOV reg64/imm */
 871             break;
 872
 873         case4(060):
 874             length += 2;
 875             break;
 876
 877         case4(064):
 878             if (opx->type & (BITS16 | BITS32 | BITS64))
 879                 length += (opx->type & BITS16) ? 2 : 4;
 880             else
 881                 length += (bits == 16) ? 2 : 4;
 882             break;
 883
 884         case4(070):
 885             length += 4;
 886             break;
 887
 888         case4(074):
 889             length += 2;
 890             break;
 891
 892         case4(0140):
 893             length += is_sbyte16(opx) ? 1 : 2;
 894             break;
 895
 896         case4(0144):
 897             codes++;
 898             length++;
 899             break;
 900
 901         case4(0150):
 902             length += is_sbyte32(opx) ? 1 : 4;
 903             break;
 904
 905         case4(0154):
 906             codes++;
 907             length++;
 908             break;
 909
 910         case4(0160):
 911             length++;
 912             ins->rex |= REX_D;
 913             ins->drexdst = regval(opx);
 914             break;
 915
 916         case4(0164):
 917             length++;
 918             ins->rex |= REX_D|REX_OC;
 919             ins->drexdst = regval(opx);
 920             break;
 921
 922         case 0171:
 923             break;
 924
 925         case 0172:
 926         case 0173:
 927         case 0174:
 928             codes++;
 929             length++;
 930             break;
 931
 932         case4(0250):
 933             length += is_sbyte32(opx) ? 1 : 4;
 934             break;
 935
 936         case4(0254):
 937             length += 4;
 938             break;
 939
 940         case4(0260):
 941             ins->rex |= REX_V;
 942             ins->drexdst = regval(opx);
 943             ins->vex_cm = *codes++;
 944             ins->vex_wlp = *codes++;
 945             break;
 946
 947         case 0270:
 948             ins->rex |= REX_V;
 949             ins->drexdst = 0;
 950             ins->vex_cm = *codes++;
 951             ins->vex_wlp = *codes++;
 952             break;
 953
 954         case4(0274):
 955             length++;
 956             break;
 957
 958         case4(0300):
 959             break;
 960
 961         case 0310:
 962             if (bits == 64)
 963                 return -1;
 964             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
 965             break;
 966
 967         case 0311:
 968             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
 969             break;
 970
 971         case 0312:
 972             break;
 973
 974         case 0313:
 975             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
 976                 has_prefix(ins, PPS_ASIZE, P_A32))
 977                 return -1;
 978             break;
 979
 980         case4(0314):
 981             break;
 982
 983         case 0320:
 984             length += (bits != 16);
 985             break;
 986
 987         case 0321:
 988             length += (bits == 16);
 989             break;
 990
 991         case 0322:
 992             break;
 993
 994         case 0323:
 995             rex_mask &= ~REX_W;
 996             break;
 997
 998         case 0324:
 999             ins->rex |= REX_W;
1000             break;
1001
1002         case 0325:
1003             ins->rex |= REX_NH;
1004             break;
1005
1006         case 0330:
1007             codes++, length++;
1008             break;
1009
1010         case 0331:
1011             break;
1012
1013         case 0332:
1014         case 0333:
1015             length++;
1016             break;
1017
1018         case 0334:
1019             ins->rex |= REX_L;
1020             break;
1021
1022         case 0335:
1023             break;
1024
1025         case 0336:
1026             if (!ins->prefixes[PPS_LREP])
1027                 ins->prefixes[PPS_LREP] = P_REP;
1028             break;
1029
1030         case 0337:
1031             if (!ins->prefixes[PPS_LREP])
1032                 ins->prefixes[PPS_LREP] = P_REPNE;
1033             break;
1034
1035         case 0340:
1036             if (ins->oprs[0].segment != NO_SEG)
1037                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1038                         " quantity of BSS space");
1039             else
1040                 length += ins->oprs[0].offset;
1041             break;
1042
1043         case 0341:
1044             if (!ins->prefixes[PPS_WAIT])
1045                 ins->prefixes[PPS_WAIT] = P_WAIT;
1046             break;
1047
1048         case4(0344):
1049             length++;
1050             break;
1051
1052         case 0360:
1053             break;
1054
1055         case 0361:
1056         case 0362:
1057         case 0363:
1058             length++;
1059             break;
1060
1061         case 0364:
1062         case 0365:
1063             break;
1064
1065         case 0366:
1066         case 0367:
1067             length++;
1068             break;
1069
1070         case 0370:
1071         case 0371:
1072         case 0372:
1073             break;
1074
1075         case 0373:
1076             length++;
1077             break;
1078
1079         case4(0100):
1080         case4(0110):
1081         case4(0120):
1082         case4(0130):
1083         case4(0200):
1084         case4(0204):
1085         case4(0210):
1086         case4(0214):
1087         case4(0220):
1088         case4(0224):
1089         case4(0230):
1090         case4(0234):
1091             {
1092                 ea ea_data;
1093                 int rfield;
1094                 opflags_t rflags;
1095                 struct operand *opy = &ins->oprs[op2];
1096
1097                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1098
1099                 if (c <= 0177) {
1100                     /* pick rfield from operand b (opx) */
1101                     rflags = regflag(opx);
1102                     rfield = nasm_regvals[opx->basereg];
1103                 } else {
1104                     rflags = 0;
1105                     rfield = c & 7;
1106                 }
1107                 if (!process_ea(opy, &ea_data, bits,
1108                                 ins->addr_size, rfield, rflags)) {
1109                     errfunc(ERR_NONFATAL, "invalid effective address");
1110                     return -1;
1111                 } else {
1112                     ins->rex |= ea_data.rex;
1113                     length += ea_data.size;
1114                 }
1115             }
1116             break;
1117
1118         default:
1119             errfunc(ERR_PANIC, "internal instruction table corrupt"
1120                     ": instruction code \\%o (0x%02X) given", c, c);
1121             break;
1122         }
1123     }
1124
1125     ins->rex &= rex_mask;
1126
1127     if (ins->rex & REX_NH) {
1128         if (ins->rex & REX_H) {
1129             errfunc(ERR_NONFATAL, "instruction cannot use high registers");
1130             return -1;
1131         }
1132         ins->rex &= ~REX_P;     /* Don't force REX prefix due to high reg */
1133     }
1134
1135     if (ins->rex & REX_V) {
1136         int bad32 = REX_R|REX_W|REX_X|REX_B;
1137
1138         if (ins->rex & REX_H) {
1139             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1140             return -1;
1141         }
1142         switch (ins->vex_wlp & 030) {
1143         case 000:
1144         case 020:
1145             ins->rex &= ~REX_W;
1146             break;
1147         case 010:
1148             ins->rex |= REX_W;
1149             bad32 &= ~REX_W;
1150             break;
1151         case 030:
1152             /* Follow REX_W */
1153             break;
1154         }
1155
1156         if (bits != 64 && ((ins->rex & bad32) || ins->drexdst > 7)) {
1157             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1158             return -1;
1159         }
1160         if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_R|REX_B)))
1161             length += 3;
1162         else
1163             length += 2;
1164     } else if (ins->rex & REX_D) {
1165         if (ins->rex & REX_H) {
1166             errfunc(ERR_NONFATAL, "cannot use high register in drex instruction");
1167             return -1;
1168         }
1169         if (bits != 64 && ((ins->rex & (REX_R|REX_W|REX_X|REX_B)) ||
1170                            ins->drexdst > 7)) {
1171             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1172             return -1;
1173         }
1174         length++;
1175     } else if (ins->rex & REX_REAL) {
1176         if (ins->rex & REX_H) {
1177             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1178             return -1;
1179         } else if (bits == 64) {
1180             length++;
1181         } else if ((ins->rex & REX_L) &&
1182                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1183                    cpu >= IF_X86_64) {
1184             /* LOCK-as-REX.R */
1185             assert_no_prefix(ins, PPS_LREP);
1186             length++;
1187         } else {
1188             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1189             return -1;
1190         }
1191     }
1192
1193     return length;
1194 }
1195
1196 #define EMIT_REX()                                                      \
1197     if (!(ins->rex & (REX_D|REX_V)) && (ins->rex & REX_REAL) && (bits == 64)) { \
1198         ins->rex = (ins->rex & REX_REAL)|REX_P;                         \
1199         out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG); \
1200         ins->rex = 0;                                                   \
1201         offset += 1; \
1202     }
1203
1204 static void gencode(int32_t segment, int64_t offset, int bits,
1205                     insn * ins, const struct itemplate *temp,
1206                     int64_t insn_end)
1207 {
1208     static char condval[] = {   /* conditional opcodes */
1209         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1210         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1211         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1212     };
1213     uint8_t c;
1214     uint8_t bytes[4];
1215     int64_t size;
1216     int64_t data;
1217     int op1, op2;
1218     struct operand *opx;
1219     const uint8_t *codes = temp->code;
1220     uint8_t opex = 0;
1221
1222     while (*codes) {
1223         c = *codes++;
1224         op1 = (c & 3) + ((opex & 1) << 2);
1225         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
1226         opx = &ins->oprs[op1];
1227         opex = 0;               /* For the next iteration */
1228
1229         switch (c) {
1230         case 01:
1231         case 02:
1232         case 03:
1233         case 04:
1234             EMIT_REX();
1235             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1236             codes += c;
1237             offset += c;
1238             break;
1239
1240         case 05:
1241         case 06:
1242         case 07:
1243             opex = c;
1244             break;
1245
1246         case4(010):
1247             EMIT_REX();
1248             bytes[0] = *codes++ + (regval(opx) & 7);
1249             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1250             offset += 1;
1251             break;
1252
1253         case4(014):
1254             /* The test for BITS8 and SBYTE here is intended to avoid
1255                warning on optimizer actions due to SBYTE, while still
1256                warn on explicit BYTE directives.  Also warn, obviously,
1257                if the optimizer isn't enabled. */
1258             if (((opx->type & BITS8) ||
1259                  !(opx->type & temp->opd[op1] & BYTENESS)) &&
1260                 (opx->offset < -128 || opx->offset > 127)) {
1261                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1262                         "signed byte value exceeds bounds");
1263             }
1264             if (opx->segment != NO_SEG) {
1265                 data = opx->offset;
1266                 out(offset, segment, &data, OUT_ADDRESS, 1,
1267                     opx->segment, opx->wrt);
1268             } else {
1269                 bytes[0] = opx->offset;
1270                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1271                     NO_SEG);
1272             }
1273             offset += 1;
1274             break;
1275
1276         case4(020):
1277             if (opx->offset < -256 || opx->offset > 255) {
1278                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1279                         "byte value exceeds bounds");
1280             }
1281             if (opx->segment != NO_SEG) {
1282                 data = opx->offset;
1283                 out(offset, segment, &data, OUT_ADDRESS, 1,
1284                     opx->segment, opx->wrt);
1285             } else {
1286                 bytes[0] = opx->offset;
1287                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1288                     NO_SEG);
1289             }
1290             offset += 1;
1291             break;
1292
1293         case4(024):
1294             if (opx->offset < 0 || opx->offset > 255)
1295                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1296                         "unsigned byte value exceeds bounds");
1297             if (opx->segment != NO_SEG) {
1298                 data = opx->offset;
1299                 out(offset, segment, &data, OUT_ADDRESS, 1,
1300                     opx->segment, opx->wrt);
1301             } else {
1302                 bytes[0] = opx->offset;
1303                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1304                     NO_SEG);
1305             }
1306             offset += 1;
1307             break;
1308
1309         case4(030):
1310             warn_overflow_opd(opx, 2);
1311             data = opx->offset;
1312             out(offset, segment, &data, OUT_ADDRESS, 2,
1313                 opx->segment, opx->wrt);
1314             offset += 2;
1315             break;
1316
1317         case4(034):
1318             if (opx->type & (BITS16 | BITS32))
1319                 size = (opx->type & BITS16) ? 2 : 4;
1320             else
1321                 size = (bits == 16) ? 2 : 4;
1322             warn_overflow_opd(opx, size);
1323             data = opx->offset;
1324             out(offset, segment, &data, OUT_ADDRESS, size,
1325                 opx->segment, opx->wrt);
1326             offset += size;
1327             break;
1328
1329         case4(040):
1330             warn_overflow_opd(opx, 4);
1331             data = opx->offset;
1332             out(offset, segment, &data, OUT_ADDRESS, 4,
1333                 opx->segment, opx->wrt);
1334             offset += 4;
1335             break;
1336
1337         case4(044):
1338             data = opx->offset;
1339             size = ins->addr_size >> 3;
1340             warn_overflow_opd(opx, size);
1341             out(offset, segment, &data, OUT_ADDRESS, size,
1342                 opx->segment, opx->wrt);
1343             offset += size;
1344             break;
1345
1346         case4(050):
1347             if (opx->segment != segment)
1348                 errfunc(ERR_NONFATAL,
1349                         "short relative jump outside segment");
1350             data = opx->offset - insn_end;
1351             if (data > 127 || data < -128)
1352                 errfunc(ERR_NONFATAL, "short jump is out of range");
1353             bytes[0] = data;
1354             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1355             offset += 1;
1356             break;
1357
1358         case4(054):
1359             data = (int64_t)opx->offset;
1360             out(offset, segment, &data, OUT_ADDRESS, 8,
1361                 opx->segment, opx->wrt);
1362             offset += 8;
1363             break;
1364
1365         case4(060):
1366             if (opx->segment != segment) {
1367                 data = opx->offset;
1368                 out(offset, segment, &data,
1369                     OUT_REL2ADR, insn_end - offset,
1370                     opx->segment, opx->wrt);
1371             } else {
1372                 data = opx->offset - insn_end;
1373                 out(offset, segment, &data,
1374                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1375             }
1376             offset += 2;
1377             break;
1378
1379         case4(064):
1380             if (opx->type & (BITS16 | BITS32 | BITS64))
1381                 size = (opx->type & BITS16) ? 2 : 4;
1382             else
1383                 size = (bits == 16) ? 2 : 4;
1384             if (opx->segment != segment) {
1385                 data = opx->offset;
1386                 out(offset, segment, &data,
1387                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1388                     insn_end - offset, opx->segment, opx->wrt);
1389             } else {
1390                 data = opx->offset - insn_end;
1391                 out(offset, segment, &data,
1392                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1393             }
1394             offset += size;
1395             break;
1396
1397         case4(070):
1398             if (opx->segment != segment) {
1399                 data = opx->offset;
1400                 out(offset, segment, &data,
1401                     OUT_REL4ADR, insn_end - offset,
1402                     opx->segment, opx->wrt);
1403             } else {
1404                 data = opx->offset - insn_end;
1405                 out(offset, segment, &data,
1406                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1407             }
1408             offset += 4;
1409             break;
1410
1411         case4(074):
1412             if (opx->segment == NO_SEG)
1413                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1414                         " relocatable");
1415             data = 0;
1416             out(offset, segment, &data, OUT_ADDRESS, 2,
1417                 outfmt->segbase(1 + opx->segment),
1418                 opx->wrt);
1419             offset += 2;
1420             break;
1421
1422         case4(0140):
1423             data = opx->offset;
1424             warn_overflow_opd(opx, 2);
1425             if (is_sbyte16(opx)) {
1426                 bytes[0] = data;
1427                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1428                     NO_SEG);
1429                 offset++;
1430             } else {
1431                 out(offset, segment, &data, OUT_ADDRESS, 2,
1432                     opx->segment, opx->wrt);
1433                 offset += 2;
1434             }
1435             break;
1436
1437         case4(0144):
1438             EMIT_REX();
1439             bytes[0] = *codes++;
1440             if (is_sbyte16(opx))
1441                 bytes[0] |= 2;  /* s-bit */
1442             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1443             offset++;
1444             break;
1445
1446         case4(0150):
1447             data = opx->offset;
1448             warn_overflow_opd(opx, 4);
1449             if (is_sbyte32(opx)) {
1450                 bytes[0] = data;
1451                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1452                     NO_SEG);
1453                 offset++;
1454             } else {
1455                 out(offset, segment, &data, OUT_ADDRESS, 4,
1456                     opx->segment, opx->wrt);
1457                 offset += 4;
1458             }
1459             break;
1460
1461         case4(0154):
1462             EMIT_REX();
1463             bytes[0] = *codes++;
1464             if (is_sbyte32(opx))
1465                 bytes[0] |= 2;  /* s-bit */
1466             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1467             offset++;
1468             break;
1469
1470         case4(0160):
1471         case4(0164):
1472             break;
1473
1474         case 0171:
1475             bytes[0] =
1476                 (ins->drexdst << 4) |
1477                 (ins->rex & REX_OC ? 0x08 : 0) |
1478                 (ins->rex & (REX_R|REX_X|REX_B));
1479             ins->rex = 0;
1480             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1481             offset++;
1482             break;
1483
1484         case 0172:
1485             c = *codes++;
1486             opx = &ins->oprs[c >> 3];
1487             bytes[0] = nasm_regvals[opx->basereg] << 4;
1488             opx = &ins->oprs[c & 7];
1489             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1490                 errfunc(ERR_NONFATAL,
1491                         "non-absolute expression not permitted as argument %d",
1492                         c & 7);
1493             } else {
1494                 if (opx->offset & ~15) {
1495                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1496                             "four-bit argument exceeds bounds");
1497                 }
1498                 bytes[0] |= opx->offset & 15;
1499             }
1500             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1501             offset++;
1502             break;
1503
1504         case 0173:
1505             c = *codes++;
1506             opx = &ins->oprs[c >> 4];
1507             bytes[0] = nasm_regvals[opx->basereg] << 4;
1508             bytes[0] |= c & 15;
1509             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1510             offset++;
1511             break;
1512
1513         case 0174:
1514             c = *codes++;
1515             opx = &ins->oprs[c];
1516             bytes[0] = nasm_regvals[opx->basereg] << 4;
1517             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1518             offset++;
1519             break;
1520
1521         case4(0250):
1522             data = opx->offset;
1523             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1524                 (int32_t)data != (int64_t)data) {
1525                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1526                         "signed dword immediate exceeds bounds");
1527             }
1528             if (is_sbyte32(opx)) {
1529                 bytes[0] = data;
1530                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1531                     NO_SEG);
1532                 offset++;
1533             } else {
1534                 out(offset, segment, &data, OUT_ADDRESS, 4,
1535                     opx->segment, opx->wrt);
1536                 offset += 4;
1537             }
1538             break;
1539
1540         case4(0254):
1541             data = opx->offset;
1542             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1543                 (int32_t)data != (int64_t)data) {
1544                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1545                         "signed dword immediate exceeds bounds");
1546             }
1547             out(offset, segment, &data, OUT_ADDRESS, 4,
1548                 opx->segment, opx->wrt);
1549             offset += 4;
1550             break;
1551
1552         case4(0260):
1553         case 0270:
1554             codes += 2;
1555             if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1556                 bytes[0] = (ins->vex_cm >> 6) ? 0x8f : 0xc4;
1557                 bytes[1] = (ins->vex_cm & 31) | ((~ins->rex & 7) << 5);
1558                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1559                     ((~ins->drexdst & 15)<< 3) | (ins->vex_wlp & 07);
1560                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1561                 offset += 3;
1562             } else {
1563                 bytes[0] = 0xc5;
1564                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1565                     ((~ins->drexdst & 15) << 3) | (ins->vex_wlp & 07);
1566                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1567                 offset += 2;
1568             }
1569             break;
1570
1571         case4(0274):
1572         {
1573             uint64_t uv, um;
1574             int s;
1575
1576             if (ins->rex & REX_W)
1577                 s = 64;
1578             else if (ins->prefixes[PPS_OSIZE] == P_O16)
1579                 s = 16;
1580             else if (ins->prefixes[PPS_OSIZE] == P_O32)
1581                 s = 32;
1582             else
1583                 s = bits;
1584
1585             um = (uint64_t)2 << (s-1);
1586             uv = opx->offset;
1587
1588             if (uv > 127 && uv < (uint64_t)-128 &&
1589                 (uv < um-128 || uv > um-1)) {
1590                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1591                         "signed byte value exceeds bounds");
1592             }
1593             if (opx->segment != NO_SEG) {
1594                 data = uv;
1595                 out(offset, segment, &data, OUT_ADDRESS, 1,
1596                     opx->segment, opx->wrt);
1597             } else {
1598                 bytes[0] = uv;
1599                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1600                     NO_SEG);
1601             }
1602             offset += 1;
1603             break;
1604         }
1605
1606         case4(0300):
1607             break;
1608
1609         case 0310:
1610             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1611                 *bytes = 0x67;
1612                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1613                 offset += 1;
1614             } else
1615                 offset += 0;
1616             break;
1617
1618         case 0311:
1619             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1620                 *bytes = 0x67;
1621                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1622                 offset += 1;
1623             } else
1624                 offset += 0;
1625             break;
1626
1627         case 0312:
1628             break;
1629
1630         case 0313:
1631             ins->rex = 0;
1632             break;
1633
1634         case4(0314):
1635             break;
1636
1637         case 0320:
1638             if (bits != 16) {
1639                 *bytes = 0x66;
1640                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1641                 offset += 1;
1642             } else
1643                 offset += 0;
1644             break;
1645
1646         case 0321:
1647             if (bits == 16) {
1648                 *bytes = 0x66;
1649                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1650                 offset += 1;
1651             } else
1652                 offset += 0;
1653             break;
1654
1655         case 0322:
1656         case 0323:
1657             break;
1658
1659         case 0324:
1660             ins->rex |= REX_W;
1661             break;
1662
1663         case 0325:
1664             break;
1665
1666         case 0330:
1667             *bytes = *codes++ ^ condval[ins->condition];
1668             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1669             offset += 1;
1670             break;
1671
1672         case 0331:
1673             break;
1674
1675         case 0332:
1676         case 0333:
1677             *bytes = c - 0332 + 0xF2;
1678             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1679             offset += 1;
1680             break;
1681
1682         case 0334:
1683             if (ins->rex & REX_R) {
1684                 *bytes = 0xF0;
1685                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1686                 offset += 1;
1687             }
1688             ins->rex &= ~(REX_L|REX_R);
1689             break;
1690
1691         case 0335:
1692             break;
1693
1694         case 0336:
1695         case 0337:
1696             break;
1697
1698         case 0340:
1699             if (ins->oprs[0].segment != NO_SEG)
1700                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1701             else {
1702                 int64_t size = ins->oprs[0].offset;
1703                 if (size > 0)
1704                     out(offset, segment, NULL,
1705                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1706                 offset += size;
1707             }
1708             break;
1709
1710         case 0341:
1711             break;
1712
1713         case 0344:
1714         case 0345:
1715             bytes[0] = c & 1;
1716             switch (ins->oprs[0].basereg) {
1717             case R_CS:
1718                 bytes[0] += 0x0E;
1719                 break;
1720             case R_DS:
1721                 bytes[0] += 0x1E;
1722                 break;
1723             case R_ES:
1724                 bytes[0] += 0x06;
1725                 break;
1726             case R_SS:
1727                 bytes[0] += 0x16;
1728                 break;
1729             default:
1730                 errfunc(ERR_PANIC,
1731                         "bizarre 8086 segment register received");
1732             }
1733             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1734             offset++;
1735             break;
1736
1737         case 0346:
1738         case 0347:
1739             bytes[0] = c & 1;
1740             switch (ins->oprs[0].basereg) {
1741             case R_FS:
1742                 bytes[0] += 0xA0;
1743                 break;
1744             case R_GS:
1745                 bytes[0] += 0xA8;
1746                 break;
1747             default:
1748                 errfunc(ERR_PANIC,
1749                         "bizarre 386 segment register received");
1750             }
1751             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1752             offset++;
1753             break;
1754
1755         case 0360:
1756             break;
1757
1758         case 0361:
1759             bytes[0] = 0x66;
1760             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1761             offset += 1;
1762             break;
1763
1764         case 0362:
1765         case 0363:
1766             bytes[0] = c - 0362 + 0xf2;
1767             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1768             offset += 1;
1769             break;
1770
1771         case 0364:
1772         case 0365:
1773             break;
1774
1775         case 0366:
1776         case 0367:
1777             *bytes = c - 0366 + 0x66;
1778             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1779             offset += 1;
1780             break;
1781
1782         case 0370:
1783         case 0371:
1784         case 0372:
1785             break;
1786
1787         case 0373:
1788             *bytes = bits == 16 ? 3 : 5;
1789             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1790             offset += 1;
1791             break;
1792
1793         case4(0100):
1794         case4(0110):
1795         case4(0120):
1796         case4(0130):
1797         case4(0200):
1798         case4(0204):
1799         case4(0210):
1800         case4(0214):
1801         case4(0220):
1802         case4(0224):
1803         case4(0230):
1804         case4(0234):
1805             {
1806                 ea ea_data;
1807                 int rfield;
1808                 opflags_t rflags;
1809                 uint8_t *p;
1810                 int32_t s;
1811                 enum out_type type;
1812                 struct operand *opy = &ins->oprs[op2];
1813
1814                 if (c <= 0177) {
1815                     /* pick rfield from operand b (opx) */
1816                     rflags = regflag(opx);
1817                     rfield = nasm_regvals[opx->basereg];
1818                 } else {
1819                     /* rfield is constant */
1820                     rflags = 0;
1821                     rfield = c & 7;
1822                 }
1823
1824                 if (!process_ea(opy, &ea_data, bits, ins->addr_size,
1825                                 rfield, rflags)) {
1826                     errfunc(ERR_NONFATAL, "invalid effective address");
1827                 }
1828
1829
1830                 p = bytes;
1831                 *p++ = ea_data.modrm;
1832                 if (ea_data.sib_present)
1833                     *p++ = ea_data.sib;
1834
1835                 /* DREX suffixes come between the SIB and the displacement */
1836                 if (ins->rex & REX_D) {
1837                     *p++ = (ins->drexdst << 4) |
1838                            (ins->rex & REX_OC ? 0x08 : 0) |
1839                            (ins->rex & (REX_R|REX_X|REX_B));
1840                     ins->rex = 0;
1841                 }
1842
1843                 s = p - bytes;
1844                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1845
1846                 /*
1847                  * Make sure the address gets the right offset in case
1848                  * the line breaks in the .lst file (BR 1197827)
1849                  */
1850                 offset += s;
1851                 s = 0;
1852
1853                 switch (ea_data.bytes) {
1854                 case 0:
1855                     break;
1856                 case 1:
1857                 case 2:
1858                 case 4:
1859                 case 8:
1860                     data = opy->offset;
1861                     warn_overflow_opd(opy, ea_data.bytes);
1862                     s += ea_data.bytes;
1863                     if (ea_data.rip) {
1864                         if (opy->segment == segment) {
1865                             data -= insn_end;
1866                             out(offset, segment, &data, OUT_ADDRESS,
1867                                 ea_data.bytes, NO_SEG, NO_SEG);
1868                         } else {
1869                             out(offset, segment, &data, OUT_REL4ADR,
1870                                 insn_end - offset, opy->segment, opy->wrt);
1871                         }
1872                     } else {
1873                         type = OUT_ADDRESS;
1874                         out(offset, segment, &data, OUT_ADDRESS,
1875                             ea_data.bytes, opy->segment, opy->wrt);
1876                     }
1877                     break;
1878                 default:
1879                     /* Impossible! */
1880                     errfunc(ERR_PANIC,
1881                             "Invalid amount of bytes (%d) for offset?!",
1882                             ea_data.bytes);
1883                     break;
1884                 }
1885                 offset += s;
1886             }
1887             break;
1888
1889         default:
1890             errfunc(ERR_PANIC, "internal instruction table corrupt"
1891                     ": instruction code \\%o (0x%02X) given", c, c);
1892             break;
1893         }
1894     }
1895 }
1896
1897 static opflags_t regflag(const operand * o)
1898 {
1899     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1900         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1901     }
1902     return nasm_reg_flags[o->basereg];
1903 }
1904
1905 static int32_t regval(const operand * o)
1906 {
1907     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1908         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1909     }
1910     return nasm_regvals[o->basereg];
1911 }
1912
1913 static int op_rexflags(const operand * o, int mask)
1914 {
1915     opflags_t flags;
1916     int val;
1917
1918     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1919         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1920     }
1921
1922     flags = nasm_reg_flags[o->basereg];
1923     val = nasm_regvals[o->basereg];
1924
1925     return rexflags(val, flags, mask);
1926 }
1927
1928 static int rexflags(int val, opflags_t flags, int mask)
1929 {
1930     int rex = 0;
1931
1932     if (val >= 8)
1933         rex |= REX_B|REX_X|REX_R;
1934     if (flags & BITS64)
1935         rex |= REX_W;
1936     if (!(REG_HIGH & ~flags))   /* AH, CH, DH, BH */
1937         rex |= REX_H;
1938     else if (!(REG8 & ~flags) && val >= 4) /* SPL, BPL, SIL, DIL */
1939         rex |= REX_P;
1940
1941     return rex & mask;
1942 }
1943
1944 static enum match_result find_match(const struct itemplate **tempp,
1945                                     insn *instruction,
1946                                     int32_t segment, int64_t offset, int bits)
1947 {
1948     const struct itemplate *temp;
1949     enum match_result m, merr;
1950     opflags_t xsizeflags[MAX_OPERANDS];
1951     bool opsizemissing = false;
1952     int i;
1953
1954     for (i = 0; i < instruction->operands; i++)
1955         xsizeflags[i] = instruction->oprs[i].type & SIZE_MASK;
1956
1957     merr = MERR_INVALOP;
1958
1959     for (temp = nasm_instructions[instruction->opcode];
1960          temp->opcode != I_none; temp++) {
1961         m = matches(temp, instruction, bits);
1962         if (m == MOK_JUMP) {
1963             if (jmp_match(segment, offset, bits, instruction, temp->code))
1964                 m = MOK_GOOD;
1965             else
1966                 m = MERR_INVALOP;
1967         } else if (m == MERR_OPSIZEMISSING &&
1968                    (temp->flags & IF_SMASK) != IF_SX) {
1969             /*
1970              * Missing operand size and a candidate for fuzzy matching...
1971              */
1972             for (i = 0; i < temp->operands; i++) {
1973                 if ((temp->opd[i] & SAME_AS) == 0)
1974                     xsizeflags[i] |= temp->opd[i] & SIZE_MASK;
1975             }
1976             opsizemissing = true;
1977         }
1978         if (m > merr)
1979             merr = m;
1980         if (merr == MOK_GOOD)
1981             goto done;
1982     }
1983
1984     /* No match, but see if we can get a fuzzy operand size match... */
1985     if (!opsizemissing)
1986         goto done;
1987
1988     for (i = 0; i < instruction->operands; i++) {
1989         /*
1990          * We ignore extrinsic operand sizes on registers, so we should
1991          * never try to fuzzy-match on them.  This also resolves the case
1992          * when we have e.g. "xmmrm128" in two different positions.
1993          */
1994         if (is_class(REGISTER, instruction->oprs[i].type))
1995             continue;
1996
1997         /* This tests if xsizeflags[i] has more than one bit set */
1998         if ((xsizeflags[i] & (xsizeflags[i]-1)))
1999             goto done;          /* No luck */
2000
2001         instruction->oprs[i].type |= xsizeflags[i]; /* Set the size */
2002     }
2003
2004     /* Try matching again... */
2005     for (temp = nasm_instructions[instruction->opcode];
2006          temp->opcode != I_none; temp++) {
2007         m = matches(temp, instruction, bits);
2008         if (m == MOK_JUMP) {
2009             if (jmp_match(segment, offset, bits, instruction, temp->code))
2010                 m = MOK_GOOD;
2011             else
2012                 m = MERR_INVALOP;
2013         }
2014         if (m > merr)
2015             merr = m;
2016         if (merr == MOK_GOOD)
2017             goto done;
2018     }
2019
2020 done:
2021     *tempp = temp;
2022     return merr;
2023 }
2024
2025 static enum match_result matches(const struct itemplate *itemp,
2026                                  insn *instruction, int bits)
2027 {
2028     int i, size[MAX_OPERANDS], asize, oprs;
2029     bool opsizemissing = false;
2030
2031     /*
2032      * Check the opcode
2033      */
2034     if (itemp->opcode != instruction->opcode)
2035         return MERR_INVALOP;
2036
2037     /*
2038      * Count the operands
2039      */
2040     if (itemp->operands != instruction->operands)
2041         return MERR_INVALOP;
2042
2043     /*
2044      * Check that no spurious colons or TOs are present
2045      */
2046     for (i = 0; i < itemp->operands; i++)
2047         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
2048             return MERR_INVALOP;
2049
2050     /*
2051      * Process size flags
2052      */
2053     switch (itemp->flags & IF_SMASK) {
2054     case IF_SB:
2055         asize = BITS8;
2056         break;
2057     case IF_SW:
2058         asize = BITS16;
2059         break;
2060     case IF_SD:
2061         asize = BITS32;
2062         break;
2063     case IF_SQ:
2064         asize = BITS64;
2065         break;
2066     case IF_SO:
2067         asize = BITS128;
2068         break;
2069     case IF_SY:
2070         asize = BITS256;
2071         break;
2072     case IF_SZ:
2073         switch (bits) {
2074         case 16:
2075             asize = BITS16;
2076             break;
2077         case 32:
2078             asize = BITS32;
2079             break;
2080         case 64:
2081             asize = BITS64;
2082             break;
2083         default:
2084             asize = 0;
2085             break;
2086         }
2087         break;
2088     default:
2089         asize = 0;
2090         break;
2091     }
2092
2093     if (itemp->flags & IF_ARMASK) {
2094         /* S- flags only apply to a specific operand */
2095         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
2096         memset(size, 0, sizeof size);
2097         size[i] = asize;
2098     } else {
2099         /* S- flags apply to all operands */
2100         for (i = 0; i < MAX_OPERANDS; i++)
2101             size[i] = asize;
2102     }
2103
2104     /*
2105      * Check that the operand flags all match up,
2106      * it's a bit tricky so lets be verbose:
2107      *
2108      * 1) Find out the size of operand. If instruction
2109      *    doesn't have one specified -- we're trying to
2110      *    guess it either from template (IF_S* flag) or
2111      *    from code bits.
2112      *
2113      * 2) If template operand (i) has SAME_AS flag [used for registers only]
2114      *    (ie the same operand as was specified somewhere in template, and
2115      *    this referred operand index is being achieved via ~SAME_AS)
2116      *    we are to be sure that both registers (in template and instruction)
2117      *    do exactly match.
2118      *
2119      * 3) If template operand do not match the instruction OR
2120      *    template has an operand size specified AND this size differ
2121      *    from which instruction has (perhaps we got it from code bits)
2122      *    we are:
2123      *      a)  Check that only size of instruction and operand is differ
2124      *          other characteristics do match
2125      *      b)  Perhaps it's a register specified in instruction so
2126      *          for such a case we just mark that operand as "size
2127      *          missing" and this will turn on fuzzy operand size
2128      *          logic facility (handled by a caller)
2129      */
2130     for (i = 0; i < itemp->operands; i++) {
2131         opflags_t type = instruction->oprs[i].type;
2132         if (!(type & SIZE_MASK))
2133             type |= size[i];
2134
2135         if (itemp->opd[i] & SAME_AS) {
2136             int j = itemp->opd[i] & ~SAME_AS;
2137             if (type != instruction->oprs[j].type ||
2138                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2139                 return MERR_INVALOP;
2140         } else if (itemp->opd[i] & ~type ||
2141             ((itemp->opd[i] & SIZE_MASK) &&
2142              ((itemp->opd[i] ^ type) & SIZE_MASK))) {
2143             if ((itemp->opd[i] & ~type & ~SIZE_MASK) || (type & SIZE_MASK)) {
2144                 return MERR_INVALOP;
2145             } else if (!is_class(REGISTER, type)) {
2146                 /*
2147                  * Note: we don't honor extrinsic operand sizes for registers,
2148                  * so "missing operand size" for a register should be
2149                  * considered a wildcard match rather than an error.
2150                  */
2151                 opsizemissing = true;
2152             }
2153         }
2154     }
2155
2156     if (opsizemissing)
2157         return MERR_OPSIZEMISSING;
2158
2159     /*
2160      * Check operand sizes
2161      */
2162     if (itemp->flags & (IF_SM | IF_SM2)) {
2163         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2164         for (i = 0; i < oprs; i++) {
2165             asize = itemp->opd[i] & SIZE_MASK;
2166             if (asize) {
2167                 for (i = 0; i < oprs; i++)
2168                     size[i] = asize;
2169                 break;
2170             }
2171         }
2172     } else {
2173         oprs = itemp->operands;
2174     }
2175
2176     for (i = 0; i < itemp->operands; i++) {
2177         if (!(itemp->opd[i] & SIZE_MASK) &&
2178             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2179             return MERR_OPSIZEMISMATCH;
2180     }
2181
2182     /*
2183      * Check template is okay at the set cpu level
2184      */
2185     if (((itemp->flags & IF_PLEVEL) > cpu))
2186         return MERR_BADCPU;
2187
2188     /*
2189      * Verify the appropriate long mode flag.
2190      */
2191     if ((itemp->flags & (bits == 64 ? IF_NOLONG : IF_LONG)))
2192         return MERR_BADMODE;
2193
2194     /*
2195      * Check if special handling needed for Jumps
2196      */
2197     if ((itemp->code[0] & 0374) == 0370)
2198         return MOK_JUMP;
2199
2200     return MOK_GOOD;
2201 }
2202
2203 static ea *process_ea(operand * input, ea * output, int bits,
2204                       int addrbits, int rfield, opflags_t rflags)
2205 {
2206     bool forw_ref = !!(input->opflags & OPFLAG_UNKNOWN);
2207
2208     output->rip = false;
2209
2210     /* REX flags for the rfield operand */
2211     output->rex |= rexflags(rfield, rflags, REX_R|REX_P|REX_W|REX_H);
2212
2213     if (is_class(REGISTER, input->type)) {  /* register direct */
2214         int i;
2215         opflags_t f;
2216
2217         if (input->basereg < EXPR_REG_START /* Verify as Register */
2218             || input->basereg >= REG_ENUM_LIMIT)
2219             return NULL;
2220         f = regflag(input);
2221         i = nasm_regvals[input->basereg];
2222
2223         if (REG_EA & ~f)
2224             return NULL;        /* Invalid EA register */
2225
2226         output->rex |= op_rexflags(input, REX_B|REX_P|REX_W|REX_H);
2227
2228         output->sib_present = false;             /* no SIB necessary */
2229         output->bytes = 0;  /* no offset necessary either */
2230         output->modrm = 0xC0 | ((rfield & 7) << 3) | (i & 7);
2231     } else {                    /* it's a memory reference */
2232         if (input->basereg == -1
2233             && (input->indexreg == -1 || input->scale == 0)) {
2234             /* it's a pure offset */
2235             if (bits == 64 && (~input->type & IP_REL)) {
2236               int scale, index, base;
2237               output->sib_present = true;
2238               scale = 0;
2239               index = 4;
2240               base = 5;
2241               output->sib = (scale << 6) | (index << 3) | base;
2242               output->bytes = 4;
2243               output->modrm = 4 | ((rfield & 7) << 3);
2244               output->rip = false;
2245             } else {
2246               output->sib_present = false;
2247               output->bytes = (addrbits != 16 ? 4 : 2);
2248               output->modrm = (addrbits != 16 ? 5 : 6) | ((rfield & 7) << 3);
2249               output->rip = bits == 64;
2250             }
2251         } else {                /* it's an indirection */
2252             int i = input->indexreg, b = input->basereg, s = input->scale;
2253             int32_t o = input->offset, seg = input->segment;
2254             int hb = input->hintbase, ht = input->hinttype;
2255             int t, it, bt;              /* register numbers */
2256             opflags_t x, ix, bx;        /* register flags */
2257
2258             if (s == 0)
2259                 i = -1;         /* make this easy, at least */
2260
2261             if (i >= EXPR_REG_START && i < REG_ENUM_LIMIT) {
2262                 it = nasm_regvals[i];
2263                 ix = nasm_reg_flags[i];
2264             } else {
2265                 it = -1;
2266                 ix = 0;
2267             }
2268
2269             if (b >= EXPR_REG_START && b < REG_ENUM_LIMIT) {
2270                 bt = nasm_regvals[b];
2271                 bx = nasm_reg_flags[b];
2272             } else {
2273                 bt = -1;
2274                 bx = 0;
2275             }
2276
2277             /* check for a 32/64-bit memory reference... */
2278             if ((ix|bx) & (BITS32|BITS64)) {
2279                 /* it must be a 32/64-bit memory reference. Firstly we have
2280                  * to check that all registers involved are type E/Rxx. */
2281                 int32_t sok = BITS32|BITS64;
2282
2283                 if (it != -1) {
2284                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2285                         sok &= ix;
2286                     else
2287                         return NULL;
2288                 }
2289
2290                 if (bt != -1) {
2291                     if (REG_GPR & ~bx)
2292                         return NULL; /* Invalid register */
2293                     if (~sok & bx & SIZE_MASK)
2294                         return NULL; /* Invalid size */
2295                     sok &= bx;
2296                 }
2297
2298                 /* While we're here, ensure the user didn't specify
2299                    WORD or QWORD. */
2300                 if (input->disp_size == 16 || input->disp_size == 64)
2301                     return NULL;
2302
2303                 if (addrbits == 16 ||
2304                     (addrbits == 32 && !(sok & BITS32)) ||
2305                     (addrbits == 64 && !(sok & BITS64)))
2306                     return NULL;
2307
2308                 /* now reorganize base/index */
2309                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2310                     ((hb == b && ht == EAH_NOTBASE)
2311                      || (hb == i && ht == EAH_MAKEBASE))) {
2312                     /* swap if hints say so */
2313                     t = bt, bt = it, it = t;
2314                     x = bx, bx = ix, ix = x;
2315                 }
2316                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2317                     bt = -1, bx = 0, s++;
2318                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2319                     /* make single reg base, unless hint */
2320                     bt = it, bx = ix, it = -1, ix = 0;
2321                 }
2322                 if (((s == 2 && it != REG_NUM_ESP
2323                       && !(input->eaflags & EAF_TIMESTWO)) || s == 3
2324                      || s == 5 || s == 9) && bt == -1)
2325                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2326                 if (it == -1 && (bt & 7) != REG_NUM_ESP
2327                     && (input->eaflags & EAF_TIMESTWO))
2328                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2329                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2330                 if (s == 1 && it == REG_NUM_ESP) {
2331                     /* swap ESP into base if scale is 1 */
2332                     t = it, it = bt, bt = t;
2333                     x = ix, ix = bx, bx = x;
2334                 }
2335                 if (it == REG_NUM_ESP
2336                     || (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2337                     return NULL;        /* wrong, for various reasons */
2338
2339                 output->rex |= rexflags(it, ix, REX_X);
2340                 output->rex |= rexflags(bt, bx, REX_B);
2341
2342                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2343                     /* no SIB needed */
2344                     int mod, rm;
2345
2346                     if (bt == -1) {
2347                         rm = 5;
2348                         mod = 0;
2349                     } else {
2350                         rm = (bt & 7);
2351                         if (rm != REG_NUM_EBP && o == 0 &&
2352                                 seg == NO_SEG && !forw_ref &&
2353                                 !(input->eaflags &
2354                                   (EAF_BYTEOFFS | EAF_WORDOFFS)))
2355                             mod = 0;
2356                         else if (input->eaflags & EAF_BYTEOFFS ||
2357                                  (o >= -128 && o <= 127 && seg == NO_SEG
2358                                   && !forw_ref
2359                                   && !(input->eaflags & EAF_WORDOFFS)))
2360                             mod = 1;
2361                         else
2362                             mod = 2;
2363                     }
2364
2365                     output->sib_present = false;
2366                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2367                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2368                 } else {
2369                     /* we need a SIB */
2370                     int mod, scale, index, base;
2371
2372                     if (it == -1)
2373                         index = 4, s = 1;
2374                     else
2375                         index = (it & 7);
2376
2377                     switch (s) {
2378                     case 1:
2379                         scale = 0;
2380                         break;
2381                     case 2:
2382                         scale = 1;
2383                         break;
2384                     case 4:
2385                         scale = 2;
2386                         break;
2387                     case 8:
2388                         scale = 3;
2389                         break;
2390                     default:   /* then what the smeg is it? */
2391                         return NULL;    /* panic */
2392                     }
2393
2394                     if (bt == -1) {
2395                         base = 5;
2396                         mod = 0;
2397                     } else {
2398                         base = (bt & 7);
2399                         if (base != REG_NUM_EBP && o == 0 &&
2400                                     seg == NO_SEG && !forw_ref &&
2401                                     !(input->eaflags &
2402                                       (EAF_BYTEOFFS | EAF_WORDOFFS)))
2403                             mod = 0;
2404                         else if (input->eaflags & EAF_BYTEOFFS ||
2405                                  (o >= -128 && o <= 127 && seg == NO_SEG
2406                                   && !forw_ref
2407                                   && !(input->eaflags & EAF_WORDOFFS)))
2408                             mod = 1;
2409                         else
2410                             mod = 2;
2411                     }
2412
2413                     output->sib_present = true;
2414                     output->bytes =  (bt == -1 || mod == 2 ? 4 : mod);
2415                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | 4;
2416                     output->sib = (scale << 6) | (index << 3) | base;
2417                 }
2418             } else {            /* it's 16-bit */
2419                 int mod, rm;
2420
2421                 /* check for 64-bit long mode */
2422                 if (addrbits == 64)
2423                     return NULL;
2424
2425                 /* check all registers are BX, BP, SI or DI */
2426                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI
2427                      && b != R_DI) || (i != -1 && i != R_BP && i != R_BX
2428                                        && i != R_SI && i != R_DI))
2429                     return NULL;
2430
2431                 /* ensure the user didn't specify DWORD/QWORD */
2432                 if (input->disp_size == 32 || input->disp_size == 64)
2433                     return NULL;
2434
2435                 if (s != 1 && i != -1)
2436                     return NULL;        /* no can do, in 16-bit EA */
2437                 if (b == -1 && i != -1) {
2438                     int tmp = b;
2439                     b = i;
2440                     i = tmp;
2441                 }               /* swap */
2442                 if ((b == R_SI || b == R_DI) && i != -1) {
2443                     int tmp = b;
2444                     b = i;
2445                     i = tmp;
2446                 }
2447                 /* have BX/BP as base, SI/DI index */
2448                 if (b == i)
2449                     return NULL;        /* shouldn't ever happen, in theory */
2450                 if (i != -1 && b != -1 &&
2451                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2452                     return NULL;        /* invalid combinations */
2453                 if (b == -1)    /* pure offset: handled above */
2454                     return NULL;        /* so if it gets to here, panic! */
2455
2456                 rm = -1;
2457                 if (i != -1)
2458                     switch (i * 256 + b) {
2459                     case R_SI * 256 + R_BX:
2460                         rm = 0;
2461                         break;
2462                     case R_DI * 256 + R_BX:
2463                         rm = 1;
2464                         break;
2465                     case R_SI * 256 + R_BP:
2466                         rm = 2;
2467                         break;
2468                     case R_DI * 256 + R_BP:
2469                         rm = 3;
2470                         break;
2471                 } else
2472                     switch (b) {
2473                     case R_SI:
2474                         rm = 4;
2475                         break;
2476                     case R_DI:
2477                         rm = 5;
2478                         break;
2479                     case R_BP:
2480                         rm = 6;
2481                         break;
2482                     case R_BX:
2483                         rm = 7;
2484                         break;
2485                     }
2486                 if (rm == -1)   /* can't happen, in theory */
2487                     return NULL;        /* so panic if it does */
2488
2489                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2490                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2491                     mod = 0;
2492                 else if (input->eaflags & EAF_BYTEOFFS ||
2493                          (o >= -128 && o <= 127 && seg == NO_SEG
2494                           && !forw_ref
2495                           && !(input->eaflags & EAF_WORDOFFS)))
2496                     mod = 1;
2497                 else
2498                     mod = 2;
2499
2500                 output->sib_present = false;    /* no SIB - it's 16-bit */
2501                 output->bytes = mod;    /* bytes of offset needed */
2502                 output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2503             }
2504         }
2505     }
2506
2507     output->size = 1 + output->sib_present + output->bytes;
2508     return output;
2509 }
2510
2511 static void add_asp(insn *ins, int addrbits)
2512 {
2513     int j, valid;
2514     int defdisp;
2515
2516     valid = (addrbits == 64) ? 64|32 : 32|16;
2517
2518     switch (ins->prefixes[PPS_ASIZE]) {
2519     case P_A16:
2520         valid &= 16;
2521         break;
2522     case P_A32:
2523         valid &= 32;
2524         break;
2525     case P_A64:
2526         valid &= 64;
2527         break;
2528     case P_ASP:
2529         valid &= (addrbits == 32) ? 16 : 32;
2530         break;
2531     default:
2532         break;
2533     }
2534
2535     for (j = 0; j < ins->operands; j++) {
2536         if (is_class(MEMORY, ins->oprs[j].type)) {
2537             opflags_t i, b;
2538
2539             /* Verify as Register */
2540             if (ins->oprs[j].indexreg < EXPR_REG_START
2541                 || ins->oprs[j].indexreg >= REG_ENUM_LIMIT)
2542                 i = 0;
2543             else
2544                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2545
2546             /* Verify as Register */
2547             if (ins->oprs[j].basereg < EXPR_REG_START
2548                 || ins->oprs[j].basereg >= REG_ENUM_LIMIT)
2549                 b = 0;
2550             else
2551                 b = nasm_reg_flags[ins->oprs[j].basereg];
2552
2553             if (ins->oprs[j].scale == 0)
2554                 i = 0;
2555
2556             if (!i && !b) {
2557                 int ds = ins->oprs[j].disp_size;
2558                 if ((addrbits != 64 && ds > 8) ||
2559                     (addrbits == 64 && ds == 16))
2560                     valid &= ds;
2561             } else {
2562                 if (!(REG16 & ~b))
2563                     valid &= 16;
2564                 if (!(REG32 & ~b))
2565                     valid &= 32;
2566                 if (!(REG64 & ~b))
2567                     valid &= 64;
2568
2569                 if (!(REG16 & ~i))
2570                     valid &= 16;
2571                 if (!(REG32 & ~i))
2572                     valid &= 32;
2573                 if (!(REG64 & ~i))
2574                     valid &= 64;
2575             }
2576         }
2577     }
2578
2579     if (valid & addrbits) {
2580         ins->addr_size = addrbits;
2581     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2582         /* Add an address size prefix */
2583         enum prefixes pref = (addrbits == 32) ? P_A16 : P_A32;
2584         ins->prefixes[PPS_ASIZE] = pref;
2585         ins->addr_size = (addrbits == 32) ? 16 : 32;
2586     } else {
2587         /* Impossible... */
2588         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2589         ins->addr_size = addrbits; /* Error recovery */
2590     }
2591
2592     defdisp = ins->addr_size == 16 ? 16 : 32;
2593
2594     for (j = 0; j < ins->operands; j++) {
2595         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2596             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp)
2597             != ins->addr_size) {
2598             /* mem_offs sizes must match the address size; if not,
2599                strip the MEM_OFFS bit and match only EA instructions */
2600             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2601         }
2602     }
2603 }