assemble.c

   1 /* ----------------------------------------------------------------------- *
   2  *
   3  *   Copyright 1996-2009 The NASM Authors - All Rights Reserved
   4  *   See the file AUTHORS included with the NASM distribution for
   5  *   the specific copyright holders.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following
   9  *   conditions are met:
  10  *
  11  *   * Redistributions of source code must retain the above copyright
  12  *     notice, this list of conditions and the following disclaimer.
  13  *   * Redistributions in binary form must reproduce the above
  14  *     copyright notice, this list of conditions and the following
  15  *     disclaimer in the documentation and/or other materials provided
  16  *     with the distribution.
  17  *
  18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  19  *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  20  *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  21  *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22  *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  23  *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24  *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  25  *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28  *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  29  *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  30  *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31  *
  32  * ----------------------------------------------------------------------- */
  33
  34 /*
  35  * assemble.c   code generation for the Netwide Assembler
  36  *
  37  * the actual codes (C syntax, i.e. octal):
  38  * \0            - terminates the code. (Unless it's a literal of course.)
  39  * \1..\4        - that many literal bytes follow in the code stream
  40  * \5            - add 4 to the primary operand number (b, low octdigit)
  41  * \6            - add 4 to the secondary operand number (a, middle octdigit)
  42  * \7            - add 4 to both the primary and the secondary operand number
  43  * \10..\13      - a literal byte follows in the code stream, to be added
  44  *                 to the register value of operand 0..3
  45  * \14..\17      - a signed byte immediate operand, from operand 0..3
  46  * \20..\23      - a byte immediate operand, from operand 0..3
  47  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  48  * \30..\33      - a word immediate operand, from operand 0..3
  49  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  50  *                 assembly mode or the operand-size override on the operand
  51  * \40..\43      - a long immediate operand, from operand 0..3
  52  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  53  *                 depending on the address size of the instruction.
  54  * \50..\53      - a byte relative operand, from operand 0..3
  55  * \54..\57      - a qword immediate operand, from operand 0..3
  56  * \60..\63      - a word relative operand, from operand 0..3
  57  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  58  *                 assembly mode or the operand-size override on the operand
  59  * \70..\73      - a long relative operand, from operand 0..3
  60  * \74..\77      - a word constant, from the _segment_ part of operand 0..3
  61  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  62  *                 field the register value of operand b.
  63  * \140..\143    - an immediate word or signed byte for operand 0..3
  64  * \144..\147    - or 2 (s-field) into opcode byte if operand 0..3
  65  *                  is a signed byte rather than a word.  Opcode byte follows.
  66  * \150..\153    - an immediate dword or signed byte for operand 0..3
  67  * \154..\157    - or 2 (s-field) into opcode byte if operand 0..3
  68  *                  is a signed byte rather than a dword.  Opcode byte follows.
  69  * \160..\163    - this instruction uses DREX rather than REX, with the
  70  *                 OC0 field set to 0, and the dest field taken from
  71  *                 operand 0..3.
  72  * \164..\167    - this instruction uses DREX rather than REX, with the
  73  *                 OC0 field set to 1, and the dest field taken from
  74  *                 operand 0..3.
  75  * \171          - placement of DREX suffix in the absence of an EA
  76  * \172\ab       - the register number from operand a in bits 7..4, with
  77  *                 the 4-bit immediate from operand b in bits 3..0.
  78  * \173\xab      - the register number from operand a in bits 7..4, with
  79  *                 the value b in bits 3..0.
  80  * \174\a        - the register number from operand a in bits 7..4, and
  81  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  82  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  83  *                 field equal to digit b.
  84  * \250..\253    - same as \150..\153, except warn if the 64-bit operand
  85  *                 is not equal to the truncated and sign-extended 32-bit
  86  *                 operand; used for 32-bit immediates in 64-bit mode.
  87  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
  88  * \260..\263    - this instruction uses VEX/XOP rather than REX, with the
  89  *                 V field taken from operand 0..3.
  90  * \270          - this instruction uses VEX/XOP rather than REX, with the
  91  *                 V field set to 1111b.
  92  *
  93  * VEX/XOP prefixes are followed by the sequence:
  94  * \tmm\wlp        where mm is the M field; and wlp is:
  95  *                 00 0ww lpp
  96  *                 [w0] ww = 0 for W = 0
  97  *                 [w1] ww = 1 for W = 1
  98  *                 [wx] ww = 2 for W don't care (always assembled as 0)
  99  *                 [ww] ww = 3 for W used as REX.W
 100  *
 101  * t = 0 for VEX (C4/C5), t = 1 for XOP (8F).
 102  *
 103  * \274..\277    - a signed byte immediate operand, from operand 0..3,
 104  *                 which is to be extended to the operand size.
 105  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
 106  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
 107  * \312          - (disassembler only) invalid with non-default address size.
 108  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
 109  * \314          - (disassembler only) invalid with REX.B
 110  * \315          - (disassembler only) invalid with REX.X
 111  * \316          - (disassembler only) invalid with REX.R
 112  * \317          - (disassembler only) invalid with REX.W
 113  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
 114  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
 115  * \322          - indicates that this instruction is only valid when the
 116  *                 operand size is the default (instruction to disassembler,
 117  *                 generates no code in the assembler)
 118  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
 119  * \324          - indicates 64-bit operand size requiring REX prefix.
 120  * \325          - instruction which always uses spl/bpl/sil/dil
 121  * \330          - a literal byte follows in the code stream, to be added
 122  *                 to the condition code value of the instruction.
 123  * \331          - instruction not valid with REP prefix.  Hint for
 124  *                 disassembler only; for SSE instructions.
 125  * \332          - REP prefix (0xF2 byte) used as opcode extension.
 126  * \333          - REP prefix (0xF3 byte) used as opcode extension.
 127  * \334          - LOCK prefix used as REX.R (used in non-64-bit mode)
 128  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
 129  * \336          - force a REP(E) prefix (0xF2) even if not specified.
 130  * \337          - force a REPNE prefix (0xF3) even if not specified.
 131  *                 \336-\337 are still listed as prefixes in the disassembler.
 132  * \340          - reserve <operand 0> bytes of uninitialized storage.
 133  *                 Operand 0 had better be a segmentless constant.
 134  * \341          - this instruction needs a WAIT "prefix"
 135  * \344,\345     - the PUSH/POP (respectively) codes for CS, DS, ES, SS
 136  *                 (POP is never used for CS) depending on operand 0
 137  * \346,\347     - the second byte of PUSH/POP codes for FS, GS, depending
 138  *                 on operand 0
 139  * \360          - no SSE prefix (== \364\331)
 140  * \361          - 66 SSE prefix (== \366\331)
 141  * \362          - F2 SSE prefix (== \364\332)
 142  * \363          - F3 SSE prefix (== \364\333)
 143  * \364          - operand-size prefix (0x66) not permitted
 144  * \365          - address-size prefix (0x67) not permitted
 145  * \366          - operand-size prefix (0x66) used as opcode extension
 146  * \367          - address-size prefix (0x67) used as opcode extension
 147  * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
 148  *                 370 is used for Jcc, 371 is used for JMP.
 149  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 150  *                 used for conditional jump over longer jump
 151  */
 152
 153 #include "compiler.h"
 154
 155 #include <stdio.h>
 156 #include <string.h>
 157 #include <inttypes.h>
 158
 159 #include "nasm.h"
 160 #include "nasmlib.h"
 161 #include "assemble.h"
 162 #include "insns.h"
 163 #include "tables.h"
 164
 165 enum match_result {
 166     /*
 167      * Matching errors.  These should be sorted so that more specific
 168      * errors come later in the sequence.
 169      */
 170     MERR_INVALOP,
 171     MERR_OPSIZEMISSING,
 172     MERR_OPSIZEMISMATCH,
 173     MERR_BADCPU,
 174     MERR_BADMODE,
 175     /*
 176      * Matching success; the conditional ones first
 177      */
 178     MOK_JUMP,                   /* Matching OK but needs jmp_match() */
 179     MOK_GOOD                    /* Matching unconditionally OK */
 180 };
 181
 182 typedef struct {
 183     int sib_present;                 /* is a SIB byte necessary? */
 184     int bytes;                       /* # of bytes of offset needed */
 185     int size;                        /* lazy - this is sib+bytes+1 */
 186     uint8_t modrm, sib, rex, rip;    /* the bytes themselves */
 187 } ea;
 188
 189 static uint32_t cpu;            /* cpu level received from nasm.c */
 190 static efunc errfunc;
 191 static struct ofmt *outfmt;
 192 static ListGen *list;
 193
 194 static int64_t calcsize(int32_t, int64_t, int, insn *, const uint8_t *);
 195 static void gencode(int32_t segment, int64_t offset, int bits,
 196                     insn * ins, const struct itemplate *temp,
 197                     int64_t insn_end);
 198 static enum match_result find_match(const struct itemplate **tempp,
 199                                     insn *instruction,
 200                                     int32_t segment, int64_t offset, int bits);
 201 static enum match_result matches(const struct itemplate *, insn *, int bits);
 202 static opflags_t regflag(const operand *);
 203 static int32_t regval(const operand *);
 204 static int rexflags(int, opflags_t, int);
 205 static int op_rexflags(const operand *, int);
 206 static ea *process_ea(operand *, ea *, int, int, int, opflags_t);
 207 static void add_asp(insn *, int);
 208
 209 static int has_prefix(insn * ins, enum prefix_pos pos, enum prefixes prefix)
 210 {
 211     return ins->prefixes[pos] == prefix;
 212 }
 213
 214 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 215 {
 216     if (ins->prefixes[pos])
 217         errfunc(ERR_NONFATAL, "invalid %s prefix",
 218                 prefix_name(ins->prefixes[pos]));
 219 }
 220
 221 static const char *size_name(int size)
 222 {
 223     switch (size) {
 224     case 1:
 225         return "byte";
 226     case 2:
 227         return "word";
 228     case 4:
 229         return "dword";
 230     case 8:
 231         return "qword";
 232     case 10:
 233         return "tword";
 234     case 16:
 235         return "oword";
 236     case 32:
 237         return "yword";
 238     default:
 239         return "???";
 240     }
 241 }
 242
 243 static void warn_overflow(int pass, int size)
 244 {
 245     errfunc(ERR_WARNING | pass | ERR_WARN_NOV,
 246             "%s data exceeds bounds", size_name(size));
 247 }
 248
 249 static void warn_overflow_const(int64_t data, int size)
 250 {
 251     if (overflow_general(data, size))
 252         warn_overflow(ERR_PASS1, size);
 253 }
 254
 255 static void warn_overflow_opd(const struct operand *o, int size)
 256 {
 257     if (size < 8 && o->wrt == NO_SEG && o->segment == NO_SEG) {
 258         if (overflow_general(o->offset, size))
 259             warn_overflow(ERR_PASS2, size);
 260     }
 261 }
 262
 263 /*
 264  * This routine wrappers the real output format's output routine,
 265  * in order to pass a copy of the data off to the listing file
 266  * generator at the same time.
 267  */
 268 static void out(int64_t offset, int32_t segto, const void *data,
 269                 enum out_type type, uint64_t size,
 270                 int32_t segment, int32_t wrt)
 271 {
 272     static int32_t lineno = 0;     /* static!!! */
 273     static char *lnfname = NULL;
 274     uint8_t p[8];
 275
 276     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 277         /*
 278          * This is a non-relocated address, and we're going to
 279          * convert it into RAWDATA format.
 280          */
 281         uint8_t *q = p;
 282
 283         if (size > 8) {
 284             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 285             return;
 286         }
 287
 288         WRITEADDR(q, *(int64_t *)data, size);
 289         data = p;
 290         type = OUT_RAWDATA;
 291     }
 292
 293     list->output(offset, data, type, size);
 294
 295     /*
 296      * this call to src_get determines when we call the
 297      * debug-format-specific "linenum" function
 298      * it updates lineno and lnfname to the current values
 299      * returning 0 if "same as last time", -2 if lnfname
 300      * changed, and the amount by which lineno changed,
 301      * if it did. thus, these variables must be static
 302      */
 303
 304     if (src_get(&lineno, &lnfname)) {
 305         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 306     }
 307
 308     outfmt->output(segto, data, type, size, segment, wrt);
 309 }
 310
 311 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 312                      insn * ins, const uint8_t *code)
 313 {
 314     int64_t isize;
 315     uint8_t c = code[0];
 316
 317     if ((c != 0370 && c != 0371) || (ins->oprs[0].type & STRICT))
 318         return false;
 319     if (!optimizing)
 320         return false;
 321     if (optimizing < 0 && c == 0371)
 322         return false;
 323
 324     isize = calcsize(segment, offset, bits, ins, code);
 325
 326     if (ins->oprs[0].opflags & OPFLAG_UNKNOWN)
 327         /* Be optimistic in pass 1 */
 328         return true;
 329
 330     if (ins->oprs[0].segment != segment)
 331         return false;
 332
 333     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 334     return (isize >= -128 && isize <= 127); /* is it byte size? */
 335 }
 336
 337 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 338                  insn * instruction, struct ofmt *output, efunc error,
 339                  ListGen * listgen)
 340 {
 341     const struct itemplate *temp;
 342     int j;
 343     enum match_result m;
 344     int64_t insn_end;
 345     int32_t itimes;
 346     int64_t start = offset;
 347     int64_t wsize;              /* size for DB etc. */
 348
 349     errfunc = error;            /* to pass to other functions */
 350     cpu = cp;
 351     outfmt = output;            /* likewise */
 352     list = listgen;             /* and again */
 353
 354     wsize = idata_bytes(instruction->opcode);
 355     if (wsize == -1)
 356         return 0;
 357
 358     if (wsize) {
 359         extop *e;
 360         int32_t t = instruction->times;
 361         if (t < 0)
 362             errfunc(ERR_PANIC,
 363                     "instruction->times < 0 (%ld) in assemble()", t);
 364
 365         while (t--) {           /* repeat TIMES times */
 366             list_for_each(e, instruction->eops) {
 367                 if (e->type == EOT_DB_NUMBER) {
 368                     if (wsize == 1) {
 369                         if (e->segment != NO_SEG)
 370                             errfunc(ERR_NONFATAL,
 371                                     "one-byte relocation attempted");
 372                         else {
 373                             uint8_t out_byte = e->offset;
 374                             out(offset, segment, &out_byte,
 375                                 OUT_RAWDATA, 1, NO_SEG, NO_SEG);
 376                         }
 377                     } else if (wsize > 8) {
 378                         errfunc(ERR_NONFATAL,
 379                                 "integer supplied to a DT, DO or DY"
 380                                 " instruction");
 381                     } else
 382                         out(offset, segment, &e->offset,
 383                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 384                     offset += wsize;
 385                 } else if (e->type == EOT_DB_STRING ||
 386                            e->type == EOT_DB_STRING_FREE) {
 387                     int align;
 388
 389                     out(offset, segment, e->stringval,
 390                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 391                     align = e->stringlen % wsize;
 392
 393                     if (align) {
 394                         align = wsize - align;
 395                         out(offset, segment, zero_buffer,
 396                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 397                     }
 398                     offset += e->stringlen + align;
 399                 }
 400             }
 401             if (t > 0 && t == instruction->times - 1) {
 402                 /*
 403                  * Dummy call to list->output to give the offset to the
 404                  * listing module.
 405                  */
 406                 list->output(offset, NULL, OUT_RAWDATA, 0);
 407                 list->uplevel(LIST_TIMES);
 408             }
 409         }
 410         if (instruction->times > 1)
 411             list->downlevel(LIST_TIMES);
 412         return offset - start;
 413     }
 414
 415     if (instruction->opcode == I_INCBIN) {
 416         const char *fname = instruction->eops->stringval;
 417         FILE *fp;
 418
 419         fp = fopen(fname, "rb");
 420         if (!fp) {
 421             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 422                   fname);
 423         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 424             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 425                   fname);
 426         } else {
 427             static char buf[4096];
 428             size_t t = instruction->times;
 429             size_t base = 0;
 430             size_t len;
 431
 432             len = ftell(fp);
 433             if (instruction->eops->next) {
 434                 base = instruction->eops->next->offset;
 435                 len -= base;
 436                 if (instruction->eops->next->next &&
 437                     len > (size_t)instruction->eops->next->next->offset)
 438                     len = (size_t)instruction->eops->next->next->offset;
 439             }
 440             /*
 441              * Dummy call to list->output to give the offset to the
 442              * listing module.
 443              */
 444             list->output(offset, NULL, OUT_RAWDATA, 0);
 445             list->uplevel(LIST_INCBIN);
 446             while (t--) {
 447                 size_t l;
 448
 449                 fseek(fp, base, SEEK_SET);
 450                 l = len;
 451                 while (l > 0) {
 452                     int32_t m;
 453                     m = fread(buf, 1, l > sizeof(buf) ? sizeof(buf) : l, fp);
 454                     if (!m) {
 455                         /*
 456                          * This shouldn't happen unless the file
 457                          * actually changes while we are reading
 458                          * it.
 459                          */
 460                         error(ERR_NONFATAL,
 461                               "`incbin': unexpected EOF while"
 462                               " reading file `%s'", fname);
 463                         t = 0;  /* Try to exit cleanly */
 464                         break;
 465                     }
 466                     out(offset, segment, buf, OUT_RAWDATA, m,
 467                         NO_SEG, NO_SEG);
 468                     l -= m;
 469                 }
 470             }
 471             list->downlevel(LIST_INCBIN);
 472             if (instruction->times > 1) {
 473                 /*
 474                  * Dummy call to list->output to give the offset to the
 475                  * listing module.
 476                  */
 477                 list->output(offset, NULL, OUT_RAWDATA, 0);
 478                 list->uplevel(LIST_TIMES);
 479                 list->downlevel(LIST_TIMES);
 480             }
 481             fclose(fp);
 482             return instruction->times * len;
 483         }
 484         return 0;               /* if we're here, there's an error */
 485     }
 486
 487     /* Check to see if we need an address-size prefix */
 488     add_asp(instruction, bits);
 489
 490     m = find_match(&temp, instruction, segment, offset, bits);
 491
 492     if (m == MOK_GOOD) {
 493         /* Matches! */
 494         int64_t insn_size = calcsize(segment, offset, bits,
 495                                      instruction, temp->code);
 496         itimes = instruction->times;
 497         if (insn_size < 0)  /* shouldn't be, on pass two */
 498             error(ERR_PANIC, "errors made it through from pass one");
 499         else
 500             while (itimes--) {
 501                 for (j = 0; j < MAXPREFIX; j++) {
 502                     uint8_t c = 0;
 503                     switch (instruction->prefixes[j]) {
 504                     case P_WAIT:
 505                         c = 0x9B;
 506                         break;
 507                     case P_LOCK:
 508                         c = 0xF0;
 509                         break;
 510                     case P_REPNE:
 511                     case P_REPNZ:
 512                         c = 0xF2;
 513                         break;
 514                     case P_REPE:
 515                     case P_REPZ:
 516                     case P_REP:
 517                         c = 0xF3;
 518                         break;
 519                     case R_CS:
 520                         if (bits == 64) {
 521                             error(ERR_WARNING | ERR_PASS2,
 522                                   "cs segment base generated, but will be ignored in 64-bit mode");
 523                         }
 524                         c = 0x2E;
 525                         break;
 526                     case R_DS:
 527                         if (bits == 64) {
 528                             error(ERR_WARNING | ERR_PASS2,
 529                                   "ds segment base generated, but will be ignored in 64-bit mode");
 530                         }
 531                         c = 0x3E;
 532                         break;
 533                     case R_ES:
 534                         if (bits == 64) {
 535                             error(ERR_WARNING | ERR_PASS2,
 536                                   "es segment base generated, but will be ignored in 64-bit mode");
 537                         }
 538                         c = 0x26;
 539                         break;
 540                     case R_FS:
 541                         c = 0x64;
 542                         break;
 543                     case R_GS:
 544                         c = 0x65;
 545                         break;
 546                     case R_SS:
 547                         if (bits == 64) {
 548                             error(ERR_WARNING | ERR_PASS2,
 549                                   "ss segment base generated, but will be ignored in 64-bit mode");
 550                         }
 551                         c = 0x36;
 552                         break;
 553                     case R_SEGR6:
 554                     case R_SEGR7:
 555                         error(ERR_NONFATAL,
 556                               "segr6 and segr7 cannot be used as prefixes");
 557                         break;
 558                     case P_A16:
 559                         if (bits == 64) {
 560                             error(ERR_NONFATAL,
 561                                   "16-bit addressing is not supported "
 562                                   "in 64-bit mode");
 563                         } else if (bits != 16)
 564                             c = 0x67;
 565                         break;
 566                     case P_A32:
 567                         if (bits != 32)
 568                             c = 0x67;
 569                         break;
 570                     case P_A64:
 571                         if (bits != 64) {
 572                             error(ERR_NONFATAL,
 573                                   "64-bit addressing is only supported "
 574                                   "in 64-bit mode");
 575                         }
 576                         break;
 577                     case P_ASP:
 578                         c = 0x67;
 579                         break;
 580                     case P_O16:
 581                         if (bits != 16)
 582                             c = 0x66;
 583                         break;
 584                     case P_O32:
 585                         if (bits == 16)
 586                             c = 0x66;
 587                         break;
 588                     case P_O64:
 589                         /* REX.W */
 590                         break;
 591                     case P_OSP:
 592                         c = 0x66;
 593                         break;
 594                     case P_none:
 595                         break;
 596                     default:
 597                         error(ERR_PANIC, "invalid instruction prefix");
 598                     }
 599                     if (c != 0) {
 600                         out(offset, segment, &c, OUT_RAWDATA, 1,
 601                             NO_SEG, NO_SEG);
 602                         offset++;
 603                     }
 604                 }
 605                 insn_end = offset + insn_size;
 606                 gencode(segment, offset, bits, instruction,
 607                         temp, insn_end);
 608                 offset += insn_size;
 609                 if (itimes > 0 && itimes == instruction->times - 1) {
 610                     /*
 611                      * Dummy call to list->output to give the offset to the
 612                      * listing module.
 613                      */
 614                     list->output(offset, NULL, OUT_RAWDATA, 0);
 615                     list->uplevel(LIST_TIMES);
 616                 }
 617             }
 618         if (instruction->times > 1)
 619             list->downlevel(LIST_TIMES);
 620         return offset - start;
 621     } else {
 622         /* No match */
 623         switch (m) {
 624         case MERR_OPSIZEMISSING:
 625             error(ERR_NONFATAL, "operation size not specified");
 626             break;
 627         case MERR_OPSIZEMISMATCH:
 628             error(ERR_NONFATAL, "mismatch in operand sizes");
 629             break;
 630         case MERR_BADCPU:
 631             error(ERR_NONFATAL, "no instruction for this cpu level");
 632             break;
 633         case MERR_BADMODE:
 634             error(ERR_NONFATAL, "instruction not supported in %d-bit mode",
 635                   bits);
 636             break;
 637         default:
 638             error(ERR_NONFATAL,
 639                   "invalid combination of opcode and operands");
 640             break;
 641         }
 642     }
 643     return 0;
 644 }
 645
 646 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 647                   insn * instruction, efunc error)
 648 {
 649     const struct itemplate *temp;
 650     enum match_result m;
 651
 652     errfunc = error;            /* to pass to other functions */
 653     cpu = cp;
 654
 655     if (instruction->opcode == I_none)
 656         return 0;
 657
 658     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 659         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 660         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 661         instruction->opcode == I_DY) {
 662         extop *e;
 663         int32_t isize, osize, wsize;
 664
 665         isize = 0;
 666         wsize = idata_bytes(instruction->opcode);
 667
 668         list_for_each(e, instruction->eops) {
 669             int32_t align;
 670
 671             osize = 0;
 672             if (e->type == EOT_DB_NUMBER) {
 673                 osize = 1;
 674                 warn_overflow_const(e->offset, wsize);
 675             } else if (e->type == EOT_DB_STRING ||
 676                        e->type == EOT_DB_STRING_FREE)
 677                 osize = e->stringlen;
 678
 679             align = (-osize) % wsize;
 680             if (align < 0)
 681                 align += wsize;
 682             isize += osize + align;
 683         }
 684         return isize * instruction->times;
 685     }
 686
 687     if (instruction->opcode == I_INCBIN) {
 688         const char *fname = instruction->eops->stringval;
 689         FILE *fp;
 690         size_t len;
 691
 692         fp = fopen(fname, "rb");
 693         if (!fp)
 694             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 695                   fname);
 696         else if (fseek(fp, 0L, SEEK_END) < 0)
 697             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 698                   fname);
 699         else {
 700             len = ftell(fp);
 701             fclose(fp);
 702             if (instruction->eops->next) {
 703                 len -= instruction->eops->next->offset;
 704                 if (instruction->eops->next->next &&
 705                     len > (size_t)instruction->eops->next->next->offset) {
 706                     len = (size_t)instruction->eops->next->next->offset;
 707                 }
 708             }
 709             return instruction->times * len;
 710         }
 711         return 0;               /* if we're here, there's an error */
 712     }
 713
 714     /* Check to see if we need an address-size prefix */
 715     add_asp(instruction, bits);
 716
 717     m = find_match(&temp, instruction, segment, offset, bits);
 718     if (m == MOK_GOOD) {
 719         /* we've matched an instruction. */
 720         int64_t isize;
 721         const uint8_t *codes = temp->code;
 722         int j;
 723
 724         isize = calcsize(segment, offset, bits, instruction, codes);
 725         if (isize < 0)
 726             return -1;
 727         for (j = 0; j < MAXPREFIX; j++) {
 728             switch (instruction->prefixes[j]) {
 729             case P_A16:
 730                 if (bits != 16)
 731                     isize++;
 732                 break;
 733             case P_A32:
 734                 if (bits != 32)
 735                     isize++;
 736                 break;
 737             case P_O16:
 738                 if (bits != 16)
 739                     isize++;
 740                 break;
 741             case P_O32:
 742                 if (bits == 16)
 743                     isize++;
 744                 break;
 745             case P_A64:
 746             case P_O64:
 747             case P_none:
 748                 break;
 749             default:
 750                 isize++;
 751                 break;
 752             }
 753         }
 754         return isize * instruction->times;
 755     } else {
 756         return -1;                  /* didn't match any instruction */
 757     }
 758 }
 759
 760 static bool possible_sbyte(operand *o)
 761 {
 762     return o->wrt == NO_SEG && o->segment == NO_SEG &&
 763         !(o->opflags & OPFLAG_UNKNOWN) &&
 764         optimizing >= 0 && !(o->type & STRICT);
 765 }
 766
 767 /* check that opn[op]  is a signed byte of size 16 or 32 */
 768 static bool is_sbyte16(operand *o)
 769 {
 770     int16_t v;
 771
 772     if (!possible_sbyte(o))
 773         return false;
 774
 775     v = o->offset;
 776     return v >= -128 && v <= 127;
 777 }
 778
 779 static bool is_sbyte32(operand *o)
 780 {
 781     int32_t v;
 782
 783     if (!possible_sbyte(o))
 784         return false;
 785
 786     v = o->offset;
 787     return v >= -128 && v <= 127;
 788 }
 789
 790 /* Common construct */
 791 #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
 792
 793 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 794                         insn * ins, const uint8_t *codes)
 795 {
 796     int64_t length = 0;
 797     uint8_t c;
 798     int rex_mask = ~0;
 799     int op1, op2;
 800     struct operand *opx;
 801     uint8_t opex = 0;
 802
 803     ins->rex = 0;               /* Ensure REX is reset */
 804
 805     if (ins->prefixes[PPS_OSIZE] == P_O64)
 806         ins->rex |= REX_W;
 807
 808     (void)segment;              /* Don't warn that this parameter is unused */
 809     (void)offset;               /* Don't warn that this parameter is unused */
 810
 811     while (*codes) {
 812         c = *codes++;
 813         op1 = (c & 3) + ((opex & 1) << 2);
 814         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
 815         opx = &ins->oprs[op1];
 816         opex = 0;               /* For the next iteration */
 817
 818         switch (c) {
 819         case 01:
 820         case 02:
 821         case 03:
 822         case 04:
 823             codes += c, length += c;
 824             break;
 825
 826         case 05:
 827         case 06:
 828         case 07:
 829             opex = c;
 830             break;
 831
 832         case4(010):
 833             ins->rex |=
 834                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 835             codes++, length++;
 836             break;
 837
 838         case4(014):
 839         case4(020):
 840         case4(024):
 841             length++;
 842             break;
 843
 844         case4(030):
 845             length += 2;
 846             break;
 847
 848         case4(034):
 849             if (opx->type & (BITS16 | BITS32 | BITS64))
 850                 length += (opx->type & BITS16) ? 2 : 4;
 851             else
 852                 length += (bits == 16) ? 2 : 4;
 853             break;
 854
 855         case4(040):
 856             length += 4;
 857             break;
 858
 859         case4(044):
 860             length += ins->addr_size >> 3;
 861             break;
 862
 863         case4(050):
 864             length++;
 865             break;
 866
 867         case4(054):
 868             length += 8; /* MOV reg64/imm */
 869             break;
 870
 871         case4(060):
 872             length += 2;
 873             break;
 874
 875         case4(064):
 876             if (opx->type & (BITS16 | BITS32 | BITS64))
 877                 length += (opx->type & BITS16) ? 2 : 4;
 878             else
 879                 length += (bits == 16) ? 2 : 4;
 880             break;
 881
 882         case4(070):
 883             length += 4;
 884             break;
 885
 886         case4(074):
 887             length += 2;
 888             break;
 889
 890         case4(0140):
 891             length += is_sbyte16(opx) ? 1 : 2;
 892             break;
 893
 894         case4(0144):
 895             codes++;
 896             length++;
 897             break;
 898
 899         case4(0150):
 900             length += is_sbyte32(opx) ? 1 : 4;
 901             break;
 902
 903         case4(0154):
 904             codes++;
 905             length++;
 906             break;
 907
 908         case4(0160):
 909             length++;
 910             ins->rex |= REX_D;
 911             ins->drexdst = regval(opx);
 912             break;
 913
 914         case4(0164):
 915             length++;
 916             ins->rex |= REX_D|REX_OC;
 917             ins->drexdst = regval(opx);
 918             break;
 919
 920         case 0171:
 921             break;
 922
 923         case 0172:
 924         case 0173:
 925         case 0174:
 926             codes++;
 927             length++;
 928             break;
 929
 930         case4(0250):
 931             length += is_sbyte32(opx) ? 1 : 4;
 932             break;
 933
 934         case4(0254):
 935             length += 4;
 936             break;
 937
 938         case4(0260):
 939             ins->rex |= REX_V;
 940             ins->drexdst = regval(opx);
 941             ins->vex_cm = *codes++;
 942             ins->vex_wlp = *codes++;
 943             break;
 944
 945         case 0270:
 946             ins->rex |= REX_V;
 947             ins->drexdst = 0;
 948             ins->vex_cm = *codes++;
 949             ins->vex_wlp = *codes++;
 950             break;
 951
 952         case4(0274):
 953             length++;
 954             break;
 955
 956         case4(0300):
 957             break;
 958
 959         case 0310:
 960             if (bits == 64)
 961                 return -1;
 962             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
 963             break;
 964
 965         case 0311:
 966             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
 967             break;
 968
 969         case 0312:
 970             break;
 971
 972         case 0313:
 973             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
 974                 has_prefix(ins, PPS_ASIZE, P_A32))
 975                 return -1;
 976             break;
 977
 978         case4(0314):
 979             break;
 980
 981         case 0320:
 982             length += (bits != 16);
 983             break;
 984
 985         case 0321:
 986             length += (bits == 16);
 987             break;
 988
 989         case 0322:
 990             break;
 991
 992         case 0323:
 993             rex_mask &= ~REX_W;
 994             break;
 995
 996         case 0324:
 997             ins->rex |= REX_W;
 998             break;
 999
1000         case 0325:
1001             ins->rex |= REX_NH;
1002             break;
1003
1004         case 0330:
1005             codes++, length++;
1006             break;
1007
1008         case 0331:
1009             break;
1010
1011         case 0332:
1012         case 0333:
1013             length++;
1014             break;
1015
1016         case 0334:
1017             ins->rex |= REX_L;
1018             break;
1019
1020         case 0335:
1021             break;
1022
1023         case 0336:
1024             if (!ins->prefixes[PPS_LREP])
1025                 ins->prefixes[PPS_LREP] = P_REP;
1026             break;
1027
1028         case 0337:
1029             if (!ins->prefixes[PPS_LREP])
1030                 ins->prefixes[PPS_LREP] = P_REPNE;
1031             break;
1032
1033         case 0340:
1034             if (ins->oprs[0].segment != NO_SEG)
1035                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1036                         " quantity of BSS space");
1037             else
1038                 length += ins->oprs[0].offset;
1039             break;
1040
1041         case 0341:
1042             if (!ins->prefixes[PPS_WAIT])
1043                 ins->prefixes[PPS_WAIT] = P_WAIT;
1044             break;
1045
1046         case4(0344):
1047             length++;
1048             break;
1049
1050         case 0360:
1051             break;
1052
1053         case 0361:
1054         case 0362:
1055         case 0363:
1056             length++;
1057             break;
1058
1059         case 0364:
1060         case 0365:
1061             break;
1062
1063         case 0366:
1064         case 0367:
1065             length++;
1066             break;
1067
1068         case 0370:
1069         case 0371:
1070         case 0372:
1071             break;
1072
1073         case 0373:
1074             length++;
1075             break;
1076
1077         case4(0100):
1078         case4(0110):
1079         case4(0120):
1080         case4(0130):
1081         case4(0200):
1082         case4(0204):
1083         case4(0210):
1084         case4(0214):
1085         case4(0220):
1086         case4(0224):
1087         case4(0230):
1088         case4(0234):
1089             {
1090                 ea ea_data;
1091                 int rfield;
1092                 opflags_t rflags;
1093                 struct operand *opy = &ins->oprs[op2];
1094
1095                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1096
1097                 if (c <= 0177) {
1098                     /* pick rfield from operand b (opx) */
1099                     rflags = regflag(opx);
1100                     rfield = nasm_regvals[opx->basereg];
1101                 } else {
1102                     rflags = 0;
1103                     rfield = c & 7;
1104                 }
1105                 if (!process_ea(opy, &ea_data, bits,
1106                                 ins->addr_size, rfield, rflags)) {
1107                     errfunc(ERR_NONFATAL, "invalid effective address");
1108                     return -1;
1109                 } else {
1110                     ins->rex |= ea_data.rex;
1111                     length += ea_data.size;
1112                 }
1113             }
1114             break;
1115
1116         default:
1117             errfunc(ERR_PANIC, "internal instruction table corrupt"
1118                     ": instruction code \\%o (0x%02X) given", c, c);
1119             break;
1120         }
1121     }
1122
1123     ins->rex &= rex_mask;
1124
1125     if (ins->rex & REX_NH) {
1126         if (ins->rex & REX_H) {
1127             errfunc(ERR_NONFATAL, "instruction cannot use high registers");
1128             return -1;
1129         }
1130         ins->rex &= ~REX_P;     /* Don't force REX prefix due to high reg */
1131     }
1132
1133     if (ins->rex & REX_V) {
1134         int bad32 = REX_R|REX_W|REX_X|REX_B;
1135
1136         if (ins->rex & REX_H) {
1137             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1138             return -1;
1139         }
1140         switch (ins->vex_wlp & 030) {
1141         case 000:
1142         case 020:
1143             ins->rex &= ~REX_W;
1144             break;
1145         case 010:
1146             ins->rex |= REX_W;
1147             bad32 &= ~REX_W;
1148             break;
1149         case 030:
1150             /* Follow REX_W */
1151             break;
1152         }
1153
1154         if (bits != 64 && ((ins->rex & bad32) || ins->drexdst > 7)) {
1155             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1156             return -1;
1157         }
1158         if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_R|REX_B)))
1159             length += 3;
1160         else
1161             length += 2;
1162     } else if (ins->rex & REX_D) {
1163         if (ins->rex & REX_H) {
1164             errfunc(ERR_NONFATAL, "cannot use high register in drex instruction");
1165             return -1;
1166         }
1167         if (bits != 64 && ((ins->rex & (REX_R|REX_W|REX_X|REX_B)) ||
1168                            ins->drexdst > 7)) {
1169             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1170             return -1;
1171         }
1172         length++;
1173     } else if (ins->rex & REX_REAL) {
1174         if (ins->rex & REX_H) {
1175             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1176             return -1;
1177         } else if (bits == 64) {
1178             length++;
1179         } else if ((ins->rex & REX_L) &&
1180                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1181                    cpu >= IF_X86_64) {
1182             /* LOCK-as-REX.R */
1183             assert_no_prefix(ins, PPS_LREP);
1184             length++;
1185         } else {
1186             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1187             return -1;
1188         }
1189     }
1190
1191     return length;
1192 }
1193
1194 #define EMIT_REX()                                                      \
1195     if (!(ins->rex & (REX_D|REX_V)) && (ins->rex & REX_REAL) && (bits == 64)) { \
1196         ins->rex = (ins->rex & REX_REAL)|REX_P;                         \
1197         out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG); \
1198         ins->rex = 0;                                                   \
1199         offset += 1; \
1200     }
1201
1202 static void gencode(int32_t segment, int64_t offset, int bits,
1203                     insn * ins, const struct itemplate *temp,
1204                     int64_t insn_end)
1205 {
1206     static char condval[] = {   /* conditional opcodes */
1207         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1208         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1209         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1210     };
1211     uint8_t c;
1212     uint8_t bytes[4];
1213     int64_t size;
1214     int64_t data;
1215     int op1, op2;
1216     struct operand *opx;
1217     const uint8_t *codes = temp->code;
1218     uint8_t opex = 0;
1219
1220     while (*codes) {
1221         c = *codes++;
1222         op1 = (c & 3) + ((opex & 1) << 2);
1223         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
1224         opx = &ins->oprs[op1];
1225         opex = 0;               /* For the next iteration */
1226
1227         switch (c) {
1228         case 01:
1229         case 02:
1230         case 03:
1231         case 04:
1232             EMIT_REX();
1233             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1234             codes += c;
1235             offset += c;
1236             break;
1237
1238         case 05:
1239         case 06:
1240         case 07:
1241             opex = c;
1242             break;
1243
1244         case4(010):
1245             EMIT_REX();
1246             bytes[0] = *codes++ + (regval(opx) & 7);
1247             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1248             offset += 1;
1249             break;
1250
1251         case4(014):
1252             /* The test for BITS8 and SBYTE here is intended to avoid
1253                warning on optimizer actions due to SBYTE, while still
1254                warn on explicit BYTE directives.  Also warn, obviously,
1255                if the optimizer isn't enabled. */
1256             if (((opx->type & BITS8) ||
1257                  !(opx->type & temp->opd[op1] & BYTENESS)) &&
1258                 (opx->offset < -128 || opx->offset > 127)) {
1259                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1260                         "signed byte value exceeds bounds");
1261             }
1262             if (opx->segment != NO_SEG) {
1263                 data = opx->offset;
1264                 out(offset, segment, &data, OUT_ADDRESS, 1,
1265                     opx->segment, opx->wrt);
1266             } else {
1267                 bytes[0] = opx->offset;
1268                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1269                     NO_SEG);
1270             }
1271             offset += 1;
1272             break;
1273
1274         case4(020):
1275             if (opx->offset < -256 || opx->offset > 255) {
1276                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1277                         "byte value exceeds bounds");
1278             }
1279             if (opx->segment != NO_SEG) {
1280                 data = opx->offset;
1281                 out(offset, segment, &data, OUT_ADDRESS, 1,
1282                     opx->segment, opx->wrt);
1283             } else {
1284                 bytes[0] = opx->offset;
1285                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1286                     NO_SEG);
1287             }
1288             offset += 1;
1289             break;
1290
1291         case4(024):
1292             if (opx->offset < 0 || opx->offset > 255)
1293                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1294                         "unsigned byte value exceeds bounds");
1295             if (opx->segment != NO_SEG) {
1296                 data = opx->offset;
1297                 out(offset, segment, &data, OUT_ADDRESS, 1,
1298                     opx->segment, opx->wrt);
1299             } else {
1300                 bytes[0] = opx->offset;
1301                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1302                     NO_SEG);
1303             }
1304             offset += 1;
1305             break;
1306
1307         case4(030):
1308             warn_overflow_opd(opx, 2);
1309             data = opx->offset;
1310             out(offset, segment, &data, OUT_ADDRESS, 2,
1311                 opx->segment, opx->wrt);
1312             offset += 2;
1313             break;
1314
1315         case4(034):
1316             if (opx->type & (BITS16 | BITS32))
1317                 size = (opx->type & BITS16) ? 2 : 4;
1318             else
1319                 size = (bits == 16) ? 2 : 4;
1320             warn_overflow_opd(opx, size);
1321             data = opx->offset;
1322             out(offset, segment, &data, OUT_ADDRESS, size,
1323                 opx->segment, opx->wrt);
1324             offset += size;
1325             break;
1326
1327         case4(040):
1328             warn_overflow_opd(opx, 4);
1329             data = opx->offset;
1330             out(offset, segment, &data, OUT_ADDRESS, 4,
1331                 opx->segment, opx->wrt);
1332             offset += 4;
1333             break;
1334
1335         case4(044):
1336             data = opx->offset;
1337             size = ins->addr_size >> 3;
1338             warn_overflow_opd(opx, size);
1339             out(offset, segment, &data, OUT_ADDRESS, size,
1340                 opx->segment, opx->wrt);
1341             offset += size;
1342             break;
1343
1344         case4(050):
1345             if (opx->segment != segment)
1346                 errfunc(ERR_NONFATAL,
1347                         "short relative jump outside segment");
1348             data = opx->offset - insn_end;
1349             if (data > 127 || data < -128)
1350                 errfunc(ERR_NONFATAL, "short jump is out of range");
1351             bytes[0] = data;
1352             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1353             offset += 1;
1354             break;
1355
1356         case4(054):
1357             data = (int64_t)opx->offset;
1358             out(offset, segment, &data, OUT_ADDRESS, 8,
1359                 opx->segment, opx->wrt);
1360             offset += 8;
1361             break;
1362
1363         case4(060):
1364             if (opx->segment != segment) {
1365                 data = opx->offset;
1366                 out(offset, segment, &data,
1367                     OUT_REL2ADR, insn_end - offset,
1368                     opx->segment, opx->wrt);
1369             } else {
1370                 data = opx->offset - insn_end;
1371                 out(offset, segment, &data,
1372                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1373             }
1374             offset += 2;
1375             break;
1376
1377         case4(064):
1378             if (opx->type & (BITS16 | BITS32 | BITS64))
1379                 size = (opx->type & BITS16) ? 2 : 4;
1380             else
1381                 size = (bits == 16) ? 2 : 4;
1382             if (opx->segment != segment) {
1383                 data = opx->offset;
1384                 out(offset, segment, &data,
1385                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1386                     insn_end - offset, opx->segment, opx->wrt);
1387             } else {
1388                 data = opx->offset - insn_end;
1389                 out(offset, segment, &data,
1390                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1391             }
1392             offset += size;
1393             break;
1394
1395         case4(070):
1396             if (opx->segment != segment) {
1397                 data = opx->offset;
1398                 out(offset, segment, &data,
1399                     OUT_REL4ADR, insn_end - offset,
1400                     opx->segment, opx->wrt);
1401             } else {
1402                 data = opx->offset - insn_end;
1403                 out(offset, segment, &data,
1404                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1405             }
1406             offset += 4;
1407             break;
1408
1409         case4(074):
1410             if (opx->segment == NO_SEG)
1411                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1412                         " relocatable");
1413             data = 0;
1414             out(offset, segment, &data, OUT_ADDRESS, 2,
1415                 outfmt->segbase(1 + opx->segment),
1416                 opx->wrt);
1417             offset += 2;
1418             break;
1419
1420         case4(0140):
1421             data = opx->offset;
1422             warn_overflow_opd(opx, 2);
1423             if (is_sbyte16(opx)) {
1424                 bytes[0] = data;
1425                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1426                     NO_SEG);
1427                 offset++;
1428             } else {
1429                 out(offset, segment, &data, OUT_ADDRESS, 2,
1430                     opx->segment, opx->wrt);
1431                 offset += 2;
1432             }
1433             break;
1434
1435         case4(0144):
1436             EMIT_REX();
1437             bytes[0] = *codes++;
1438             if (is_sbyte16(opx))
1439                 bytes[0] |= 2;  /* s-bit */
1440             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1441             offset++;
1442             break;
1443
1444         case4(0150):
1445             data = opx->offset;
1446             warn_overflow_opd(opx, 4);
1447             if (is_sbyte32(opx)) {
1448                 bytes[0] = data;
1449                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1450                     NO_SEG);
1451                 offset++;
1452             } else {
1453                 out(offset, segment, &data, OUT_ADDRESS, 4,
1454                     opx->segment, opx->wrt);
1455                 offset += 4;
1456             }
1457             break;
1458
1459         case4(0154):
1460             EMIT_REX();
1461             bytes[0] = *codes++;
1462             if (is_sbyte32(opx))
1463                 bytes[0] |= 2;  /* s-bit */
1464             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1465             offset++;
1466             break;
1467
1468         case4(0160):
1469         case4(0164):
1470             break;
1471
1472         case 0171:
1473             bytes[0] =
1474                 (ins->drexdst << 4) |
1475                 (ins->rex & REX_OC ? 0x08 : 0) |
1476                 (ins->rex & (REX_R|REX_X|REX_B));
1477             ins->rex = 0;
1478             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1479             offset++;
1480             break;
1481
1482         case 0172:
1483             c = *codes++;
1484             opx = &ins->oprs[c >> 3];
1485             bytes[0] = nasm_regvals[opx->basereg] << 4;
1486             opx = &ins->oprs[c & 7];
1487             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1488                 errfunc(ERR_NONFATAL,
1489                         "non-absolute expression not permitted as argument %d",
1490                         c & 7);
1491             } else {
1492                 if (opx->offset & ~15) {
1493                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1494                             "four-bit argument exceeds bounds");
1495                 }
1496                 bytes[0] |= opx->offset & 15;
1497             }
1498             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1499             offset++;
1500             break;
1501
1502         case 0173:
1503             c = *codes++;
1504             opx = &ins->oprs[c >> 4];
1505             bytes[0] = nasm_regvals[opx->basereg] << 4;
1506             bytes[0] |= c & 15;
1507             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1508             offset++;
1509             break;
1510
1511         case 0174:
1512             c = *codes++;
1513             opx = &ins->oprs[c];
1514             bytes[0] = nasm_regvals[opx->basereg] << 4;
1515             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1516             offset++;
1517             break;
1518
1519         case4(0250):
1520             data = opx->offset;
1521             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1522                 (int32_t)data != (int64_t)data) {
1523                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1524                         "signed dword immediate exceeds bounds");
1525             }
1526             if (is_sbyte32(opx)) {
1527                 bytes[0] = data;
1528                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1529                     NO_SEG);
1530                 offset++;
1531             } else {
1532                 out(offset, segment, &data, OUT_ADDRESS, 4,
1533                     opx->segment, opx->wrt);
1534                 offset += 4;
1535             }
1536             break;
1537
1538         case4(0254):
1539             data = opx->offset;
1540             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1541                 (int32_t)data != (int64_t)data) {
1542                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1543                         "signed dword immediate exceeds bounds");
1544             }
1545             out(offset, segment, &data, OUT_ADDRESS, 4,
1546                 opx->segment, opx->wrt);
1547             offset += 4;
1548             break;
1549
1550         case4(0260):
1551         case 0270:
1552             codes += 2;
1553             if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1554                 bytes[0] = (ins->vex_cm >> 6) ? 0x8f : 0xc4;
1555                 bytes[1] = (ins->vex_cm & 31) | ((~ins->rex & 7) << 5);
1556                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1557                     ((~ins->drexdst & 15)<< 3) | (ins->vex_wlp & 07);
1558                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1559                 offset += 3;
1560             } else {
1561                 bytes[0] = 0xc5;
1562                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1563                     ((~ins->drexdst & 15) << 3) | (ins->vex_wlp & 07);
1564                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1565                 offset += 2;
1566             }
1567             break;
1568
1569         case4(0274):
1570         {
1571             uint64_t uv, um;
1572             int s;
1573
1574             if (ins->rex & REX_W)
1575                 s = 64;
1576             else if (ins->prefixes[PPS_OSIZE] == P_O16)
1577                 s = 16;
1578             else if (ins->prefixes[PPS_OSIZE] == P_O32)
1579                 s = 32;
1580             else
1581                 s = bits;
1582
1583             um = (uint64_t)2 << (s-1);
1584             uv = opx->offset;
1585
1586             if (uv > 127 && uv < (uint64_t)-128 &&
1587                 (uv < um-128 || uv > um-1)) {
1588                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1589                         "signed byte value exceeds bounds");
1590             }
1591             if (opx->segment != NO_SEG) {
1592                 data = uv;
1593                 out(offset, segment, &data, OUT_ADDRESS, 1,
1594                     opx->segment, opx->wrt);
1595             } else {
1596                 bytes[0] = uv;
1597                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1598                     NO_SEG);
1599             }
1600             offset += 1;
1601             break;
1602         }
1603
1604         case4(0300):
1605             break;
1606
1607         case 0310:
1608             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1609                 *bytes = 0x67;
1610                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1611                 offset += 1;
1612             } else
1613                 offset += 0;
1614             break;
1615
1616         case 0311:
1617             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1618                 *bytes = 0x67;
1619                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1620                 offset += 1;
1621             } else
1622                 offset += 0;
1623             break;
1624
1625         case 0312:
1626             break;
1627
1628         case 0313:
1629             ins->rex = 0;
1630             break;
1631
1632         case4(0314):
1633             break;
1634
1635         case 0320:
1636             if (bits != 16) {
1637                 *bytes = 0x66;
1638                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1639                 offset += 1;
1640             } else
1641                 offset += 0;
1642             break;
1643
1644         case 0321:
1645             if (bits == 16) {
1646                 *bytes = 0x66;
1647                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1648                 offset += 1;
1649             } else
1650                 offset += 0;
1651             break;
1652
1653         case 0322:
1654         case 0323:
1655             break;
1656
1657         case 0324:
1658             ins->rex |= REX_W;
1659             break;
1660
1661         case 0325:
1662             break;
1663
1664         case 0330:
1665             *bytes = *codes++ ^ condval[ins->condition];
1666             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1667             offset += 1;
1668             break;
1669
1670         case 0331:
1671             break;
1672
1673         case 0332:
1674         case 0333:
1675             *bytes = c - 0332 + 0xF2;
1676             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1677             offset += 1;
1678             break;
1679
1680         case 0334:
1681             if (ins->rex & REX_R) {
1682                 *bytes = 0xF0;
1683                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1684                 offset += 1;
1685             }
1686             ins->rex &= ~(REX_L|REX_R);
1687             break;
1688
1689         case 0335:
1690             break;
1691
1692         case 0336:
1693         case 0337:
1694             break;
1695
1696         case 0340:
1697             if (ins->oprs[0].segment != NO_SEG)
1698                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1699             else {
1700                 int64_t size = ins->oprs[0].offset;
1701                 if (size > 0)
1702                     out(offset, segment, NULL,
1703                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1704                 offset += size;
1705             }
1706             break;
1707
1708         case 0341:
1709             break;
1710
1711         case 0344:
1712         case 0345:
1713             bytes[0] = c & 1;
1714             switch (ins->oprs[0].basereg) {
1715             case R_CS:
1716                 bytes[0] += 0x0E;
1717                 break;
1718             case R_DS:
1719                 bytes[0] += 0x1E;
1720                 break;
1721             case R_ES:
1722                 bytes[0] += 0x06;
1723                 break;
1724             case R_SS:
1725                 bytes[0] += 0x16;
1726                 break;
1727             default:
1728                 errfunc(ERR_PANIC,
1729                         "bizarre 8086 segment register received");
1730             }
1731             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1732             offset++;
1733             break;
1734
1735         case 0346:
1736         case 0347:
1737             bytes[0] = c & 1;
1738             switch (ins->oprs[0].basereg) {
1739             case R_FS:
1740                 bytes[0] += 0xA0;
1741                 break;
1742             case R_GS:
1743                 bytes[0] += 0xA8;
1744                 break;
1745             default:
1746                 errfunc(ERR_PANIC,
1747                         "bizarre 386 segment register received");
1748             }
1749             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1750             offset++;
1751             break;
1752
1753         case 0360:
1754             break;
1755
1756         case 0361:
1757             bytes[0] = 0x66;
1758             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1759             offset += 1;
1760             break;
1761
1762         case 0362:
1763         case 0363:
1764             bytes[0] = c - 0362 + 0xf2;
1765             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1766             offset += 1;
1767             break;
1768
1769         case 0364:
1770         case 0365:
1771             break;
1772
1773         case 0366:
1774         case 0367:
1775             *bytes = c - 0366 + 0x66;
1776             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1777             offset += 1;
1778             break;
1779
1780         case 0370:
1781         case 0371:
1782         case 0372:
1783             break;
1784
1785         case 0373:
1786             *bytes = bits == 16 ? 3 : 5;
1787             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1788             offset += 1;
1789             break;
1790
1791         case4(0100):
1792         case4(0110):
1793         case4(0120):
1794         case4(0130):
1795         case4(0200):
1796         case4(0204):
1797         case4(0210):
1798         case4(0214):
1799         case4(0220):
1800         case4(0224):
1801         case4(0230):
1802         case4(0234):
1803             {
1804                 ea ea_data;
1805                 int rfield;
1806                 opflags_t rflags;
1807                 uint8_t *p;
1808                 int32_t s;
1809                 enum out_type type;
1810                 struct operand *opy = &ins->oprs[op2];
1811
1812                 if (c <= 0177) {
1813                     /* pick rfield from operand b (opx) */
1814                     rflags = regflag(opx);
1815                     rfield = nasm_regvals[opx->basereg];
1816                 } else {
1817                     /* rfield is constant */
1818                     rflags = 0;
1819                     rfield = c & 7;
1820                 }
1821
1822                 if (!process_ea(opy, &ea_data, bits, ins->addr_size,
1823                                 rfield, rflags)) {
1824                     errfunc(ERR_NONFATAL, "invalid effective address");
1825                 }
1826
1827
1828                 p = bytes;
1829                 *p++ = ea_data.modrm;
1830                 if (ea_data.sib_present)
1831                     *p++ = ea_data.sib;
1832
1833                 /* DREX suffixes come between the SIB and the displacement */
1834                 if (ins->rex & REX_D) {
1835                     *p++ = (ins->drexdst << 4) |
1836                            (ins->rex & REX_OC ? 0x08 : 0) |
1837                            (ins->rex & (REX_R|REX_X|REX_B));
1838                     ins->rex = 0;
1839                 }
1840
1841                 s = p - bytes;
1842                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1843
1844                 /*
1845                  * Make sure the address gets the right offset in case
1846                  * the line breaks in the .lst file (BR 1197827)
1847                  */
1848                 offset += s;
1849                 s = 0;
1850
1851                 switch (ea_data.bytes) {
1852                 case 0:
1853                     break;
1854                 case 1:
1855                 case 2:
1856                 case 4:
1857                 case 8:
1858                     data = opy->offset;
1859                     warn_overflow_opd(opy, ea_data.bytes);
1860                     s += ea_data.bytes;
1861                     if (ea_data.rip) {
1862                         if (opy->segment == segment) {
1863                             data -= insn_end;
1864                             out(offset, segment, &data, OUT_ADDRESS,
1865                                 ea_data.bytes, NO_SEG, NO_SEG);
1866                         } else {
1867                             out(offset, segment, &data, OUT_REL4ADR,
1868                                 insn_end - offset, opy->segment, opy->wrt);
1869                         }
1870                     } else {
1871                         type = OUT_ADDRESS;
1872                         out(offset, segment, &data, OUT_ADDRESS,
1873                             ea_data.bytes, opy->segment, opy->wrt);
1874                     }
1875                     break;
1876                 default:
1877                     /* Impossible! */
1878                     errfunc(ERR_PANIC,
1879                             "Invalid amount of bytes (%d) for offset?!",
1880                             ea_data.bytes);
1881                     break;
1882                 }
1883                 offset += s;
1884             }
1885             break;
1886
1887         default:
1888             errfunc(ERR_PANIC, "internal instruction table corrupt"
1889                     ": instruction code \\%o (0x%02X) given", c, c);
1890             break;
1891         }
1892     }
1893 }
1894
1895 static opflags_t regflag(const operand * o)
1896 {
1897     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1898         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1899     }
1900     return nasm_reg_flags[o->basereg];
1901 }
1902
1903 static int32_t regval(const operand * o)
1904 {
1905     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1906         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1907     }
1908     return nasm_regvals[o->basereg];
1909 }
1910
1911 static int op_rexflags(const operand * o, int mask)
1912 {
1913     opflags_t flags;
1914     int val;
1915
1916     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1917         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1918     }
1919
1920     flags = nasm_reg_flags[o->basereg];
1921     val = nasm_regvals[o->basereg];
1922
1923     return rexflags(val, flags, mask);
1924 }
1925
1926 static int rexflags(int val, opflags_t flags, int mask)
1927 {
1928     int rex = 0;
1929
1930     if (val >= 8)
1931         rex |= REX_B|REX_X|REX_R;
1932     if (flags & BITS64)
1933         rex |= REX_W;
1934     if (!(REG_HIGH & ~flags))   /* AH, CH, DH, BH */
1935         rex |= REX_H;
1936     else if (!(REG8 & ~flags) && val >= 4) /* SPL, BPL, SIL, DIL */
1937         rex |= REX_P;
1938
1939     return rex & mask;
1940 }
1941
1942 static enum match_result find_match(const struct itemplate **tempp,
1943                                     insn *instruction,
1944                                     int32_t segment, int64_t offset, int bits)
1945 {
1946     const struct itemplate *temp;
1947     enum match_result m, merr;
1948     opflags_t xsizeflags[MAX_OPERANDS];
1949     bool opsizemissing = false;
1950     int i;
1951
1952     for (i = 0; i < instruction->operands; i++)
1953         xsizeflags[i] = instruction->oprs[i].type & SIZE_MASK;
1954
1955     merr = MERR_INVALOP;
1956
1957     for (temp = nasm_instructions[instruction->opcode];
1958          temp->opcode != I_none; temp++) {
1959         m = matches(temp, instruction, bits);
1960         if (m == MOK_JUMP) {
1961             if (jmp_match(segment, offset, bits, instruction, temp->code))
1962                 m = MOK_GOOD;
1963             else
1964                 m = MERR_INVALOP;
1965         } else if (m == MERR_OPSIZEMISSING &&
1966                    (temp->flags & IF_SMASK) != IF_SX) {
1967             /*
1968              * Missing operand size and a candidate for fuzzy matching...
1969              */
1970             for (i = 0; i < temp->operands; i++) {
1971                 if ((temp->opd[i] & SAME_AS) == 0)
1972                     xsizeflags[i] |= temp->opd[i] & SIZE_MASK;
1973             }
1974             opsizemissing = true;
1975         }
1976         if (m > merr)
1977             merr = m;
1978         if (merr == MOK_GOOD)
1979             goto done;
1980     }
1981
1982     /* No match, but see if we can get a fuzzy operand size match... */
1983     if (!opsizemissing)
1984         goto done;
1985
1986     for (i = 0; i < instruction->operands; i++) {
1987         /*
1988          * We ignore extrinsic operand sizes on registers, so we should
1989          * never try to fuzzy-match on them.  This also resolves the case
1990          * when we have e.g. "xmmrm128" in two different positions.
1991          */
1992         if (is_class(REGISTER, instruction->oprs[i].type))
1993             continue;
1994
1995         /* This tests if xsizeflags[i] has more than one bit set */
1996         if ((xsizeflags[i] & (xsizeflags[i]-1)))
1997             goto done;          /* No luck */
1998
1999         instruction->oprs[i].type |= xsizeflags[i]; /* Set the size */
2000     }
2001
2002     /* Try matching again... */
2003     for (temp = nasm_instructions[instruction->opcode];
2004          temp->opcode != I_none; temp++) {
2005         m = matches(temp, instruction, bits);
2006         if (m == MOK_JUMP) {
2007             if (jmp_match(segment, offset, bits, instruction, temp->code))
2008                 m = MOK_GOOD;
2009             else
2010                 m = MERR_INVALOP;
2011         }
2012         if (m > merr)
2013             merr = m;
2014         if (merr == MOK_GOOD)
2015             goto done;
2016     }
2017
2018 done:
2019     *tempp = temp;
2020     return merr;
2021 }
2022
2023 static enum match_result matches(const struct itemplate *itemp,
2024                                  insn *instruction, int bits)
2025 {
2026     int i, size[MAX_OPERANDS], asize, oprs;
2027     bool opsizemissing = false;
2028
2029     /*
2030      * Check the opcode
2031      */
2032     if (itemp->opcode != instruction->opcode)
2033         return MERR_INVALOP;
2034
2035     /*
2036      * Count the operands
2037      */
2038     if (itemp->operands != instruction->operands)
2039         return MERR_INVALOP;
2040
2041     /*
2042      * Check that no spurious colons or TOs are present
2043      */
2044     for (i = 0; i < itemp->operands; i++)
2045         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
2046             return MERR_INVALOP;
2047
2048     /*
2049      * Process size flags
2050      */
2051     switch (itemp->flags & IF_SMASK) {
2052     case IF_SB:
2053         asize = BITS8;
2054         break;
2055     case IF_SW:
2056         asize = BITS16;
2057         break;
2058     case IF_SD:
2059         asize = BITS32;
2060         break;
2061     case IF_SQ:
2062         asize = BITS64;
2063         break;
2064     case IF_SO:
2065         asize = BITS128;
2066         break;
2067     case IF_SY:
2068         asize = BITS256;
2069         break;
2070     case IF_SZ:
2071         switch (bits) {
2072         case 16:
2073             asize = BITS16;
2074             break;
2075         case 32:
2076             asize = BITS32;
2077             break;
2078         case 64:
2079             asize = BITS64;
2080             break;
2081         default:
2082             asize = 0;
2083             break;
2084         }
2085         break;
2086     default:
2087         asize = 0;
2088         break;
2089     }
2090
2091     if (itemp->flags & IF_ARMASK) {
2092         /* S- flags only apply to a specific operand */
2093         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
2094         memset(size, 0, sizeof size);
2095         size[i] = asize;
2096     } else {
2097         /* S- flags apply to all operands */
2098         for (i = 0; i < MAX_OPERANDS; i++)
2099             size[i] = asize;
2100     }
2101
2102     /*
2103      * Check that the operand flags all match up,
2104      * it's a bit tricky so lets be verbose:
2105      *
2106      * 1) Find out the size of operand. If instruction
2107      *    doesn't have one specified -- we're trying to
2108      *    guess it either from template (IF_S* flag) or
2109      *    from code bits.
2110      *
2111      * 2) If template operand (i) has SAME_AS flag [used for registers only]
2112      *    (ie the same operand as was specified somewhere in template, and
2113      *    this referred operand index is being achieved via ~SAME_AS)
2114      *    we are to be sure that both registers (in template and instruction)
2115      *    do exactly match.
2116      *
2117      * 3) If template operand do not match the instruction OR
2118      *    template has an operand size specified AND this size differ
2119      *    from which instruction has (perhaps we got it from code bits)
2120      *    we are:
2121      *      a)  Check that only size of instruction and operand is differ
2122      *          other characteristics do match
2123      *      b)  Perhaps it's a register specified in instruction so
2124      *          for such a case we just mark that operand as "size
2125      *          missing" and this will turn on fuzzy operand size
2126      *          logic facility (handled by a caller)
2127      */
2128     for (i = 0; i < itemp->operands; i++) {
2129         opflags_t type = instruction->oprs[i].type;
2130         if (!(type & SIZE_MASK))
2131             type |= size[i];
2132
2133         if (itemp->opd[i] & SAME_AS) {
2134             int j = itemp->opd[i] & ~SAME_AS;
2135             if (type != instruction->oprs[j].type ||
2136                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2137                 return MERR_INVALOP;
2138         } else if (itemp->opd[i] & ~type ||
2139             ((itemp->opd[i] & SIZE_MASK) &&
2140              ((itemp->opd[i] ^ type) & SIZE_MASK))) {
2141             if ((itemp->opd[i] & ~type & ~SIZE_MASK) || (type & SIZE_MASK)) {
2142                 return MERR_INVALOP;
2143             } else if (!is_class(REGISTER, type)) {
2144                 /*
2145                  * Note: we don't honor extrinsic operand sizes for registers,
2146                  * so "missing operand size" for a register should be
2147                  * considered a wildcard match rather than an error.
2148                  */
2149                 opsizemissing = true;
2150             }
2151         }
2152     }
2153
2154     if (opsizemissing)
2155         return MERR_OPSIZEMISSING;
2156
2157     /*
2158      * Check operand sizes
2159      */
2160     if (itemp->flags & (IF_SM | IF_SM2)) {
2161         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2162         for (i = 0; i < oprs; i++) {
2163             asize = itemp->opd[i] & SIZE_MASK;
2164             if (asize) {
2165                 for (i = 0; i < oprs; i++)
2166                     size[i] = asize;
2167                 break;
2168             }
2169         }
2170     } else {
2171         oprs = itemp->operands;
2172     }
2173
2174     for (i = 0; i < itemp->operands; i++) {
2175         if (!(itemp->opd[i] & SIZE_MASK) &&
2176             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2177             return MERR_OPSIZEMISMATCH;
2178     }
2179
2180     /*
2181      * Check template is okay at the set cpu level
2182      */
2183     if (((itemp->flags & IF_PLEVEL) > cpu))
2184         return MERR_BADCPU;
2185
2186     /*
2187      * Verify the appropriate long mode flag.
2188      */
2189     if ((itemp->flags & (bits == 64 ? IF_NOLONG : IF_LONG)))
2190         return MERR_BADMODE;
2191
2192     /*
2193      * Check if special handling needed for Jumps
2194      */
2195     if ((itemp->code[0] & 0374) == 0370)
2196         return MOK_JUMP;
2197
2198     return MOK_GOOD;
2199 }
2200
2201 static ea *process_ea(operand * input, ea * output, int bits,
2202                       int addrbits, int rfield, opflags_t rflags)
2203 {
2204     bool forw_ref = !!(input->opflags & OPFLAG_UNKNOWN);
2205
2206     output->rip = false;
2207
2208     /* REX flags for the rfield operand */
2209     output->rex |= rexflags(rfield, rflags, REX_R|REX_P|REX_W|REX_H);
2210
2211     if (is_class(REGISTER, input->type)) {  /* register direct */
2212         int i;
2213         opflags_t f;
2214
2215         if (input->basereg < EXPR_REG_START /* Verify as Register */
2216             || input->basereg >= REG_ENUM_LIMIT)
2217             return NULL;
2218         f = regflag(input);
2219         i = nasm_regvals[input->basereg];
2220
2221         if (REG_EA & ~f)
2222             return NULL;        /* Invalid EA register */
2223
2224         output->rex |= op_rexflags(input, REX_B|REX_P|REX_W|REX_H);
2225
2226         output->sib_present = false;             /* no SIB necessary */
2227         output->bytes = 0;  /* no offset necessary either */
2228         output->modrm = 0xC0 | ((rfield & 7) << 3) | (i & 7);
2229     } else {                    /* it's a memory reference */
2230         if (input->basereg == -1
2231             && (input->indexreg == -1 || input->scale == 0)) {
2232             /* it's a pure offset */
2233             if (bits == 64 && (~input->type & IP_REL)) {
2234               int scale, index, base;
2235               output->sib_present = true;
2236               scale = 0;
2237               index = 4;
2238               base = 5;
2239               output->sib = (scale << 6) | (index << 3) | base;
2240               output->bytes = 4;
2241               output->modrm = 4 | ((rfield & 7) << 3);
2242               output->rip = false;
2243             } else {
2244               output->sib_present = false;
2245               output->bytes = (addrbits != 16 ? 4 : 2);
2246               output->modrm = (addrbits != 16 ? 5 : 6) | ((rfield & 7) << 3);
2247               output->rip = bits == 64;
2248             }
2249         } else {                /* it's an indirection */
2250             int i = input->indexreg, b = input->basereg, s = input->scale;
2251             int32_t o = input->offset, seg = input->segment;
2252             int hb = input->hintbase, ht = input->hinttype;
2253             int t, it, bt;              /* register numbers */
2254             opflags_t x, ix, bx;        /* register flags */
2255
2256             if (s == 0)
2257                 i = -1;         /* make this easy, at least */
2258
2259             if (i >= EXPR_REG_START && i < REG_ENUM_LIMIT) {
2260                 it = nasm_regvals[i];
2261                 ix = nasm_reg_flags[i];
2262             } else {
2263                 it = -1;
2264                 ix = 0;
2265             }
2266
2267             if (b >= EXPR_REG_START && b < REG_ENUM_LIMIT) {
2268                 bt = nasm_regvals[b];
2269                 bx = nasm_reg_flags[b];
2270             } else {
2271                 bt = -1;
2272                 bx = 0;
2273             }
2274
2275             /* check for a 32/64-bit memory reference... */
2276             if ((ix|bx) & (BITS32|BITS64)) {
2277                 /* it must be a 32/64-bit memory reference. Firstly we have
2278                  * to check that all registers involved are type E/Rxx. */
2279                 int32_t sok = BITS32|BITS64;
2280
2281                 if (it != -1) {
2282                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2283                         sok &= ix;
2284                     else
2285                         return NULL;
2286                 }
2287
2288                 if (bt != -1) {
2289                     if (REG_GPR & ~bx)
2290                         return NULL; /* Invalid register */
2291                     if (~sok & bx & SIZE_MASK)
2292                         return NULL; /* Invalid size */
2293                     sok &= bx;
2294                 }
2295
2296                 /* While we're here, ensure the user didn't specify
2297                    WORD or QWORD. */
2298                 if (input->disp_size == 16 || input->disp_size == 64)
2299                     return NULL;
2300
2301                 if (addrbits == 16 ||
2302                     (addrbits == 32 && !(sok & BITS32)) ||
2303                     (addrbits == 64 && !(sok & BITS64)))
2304                     return NULL;
2305
2306                 /* now reorganize base/index */
2307                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2308                     ((hb == b && ht == EAH_NOTBASE)
2309                      || (hb == i && ht == EAH_MAKEBASE))) {
2310                     /* swap if hints say so */
2311                     t = bt, bt = it, it = t;
2312                     x = bx, bx = ix, ix = x;
2313                 }
2314                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2315                     bt = -1, bx = 0, s++;
2316                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2317                     /* make single reg base, unless hint */
2318                     bt = it, bx = ix, it = -1, ix = 0;
2319                 }
2320                 if (((s == 2 && it != REG_NUM_ESP
2321                       && !(input->eaflags & EAF_TIMESTWO)) || s == 3
2322                      || s == 5 || s == 9) && bt == -1)
2323                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2324                 if (it == -1 && (bt & 7) != REG_NUM_ESP
2325                     && (input->eaflags & EAF_TIMESTWO))
2326                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2327                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2328                 if (s == 1 && it == REG_NUM_ESP) {
2329                     /* swap ESP into base if scale is 1 */
2330                     t = it, it = bt, bt = t;
2331                     x = ix, ix = bx, bx = x;
2332                 }
2333                 if (it == REG_NUM_ESP
2334                     || (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2335                     return NULL;        /* wrong, for various reasons */
2336
2337                 output->rex |= rexflags(it, ix, REX_X);
2338                 output->rex |= rexflags(bt, bx, REX_B);
2339
2340                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2341                     /* no SIB needed */
2342                     int mod, rm;
2343
2344                     if (bt == -1) {
2345                         rm = 5;
2346                         mod = 0;
2347                     } else {
2348                         rm = (bt & 7);
2349                         if (rm != REG_NUM_EBP && o == 0 &&
2350                                 seg == NO_SEG && !forw_ref &&
2351                                 !(input->eaflags &
2352                                   (EAF_BYTEOFFS | EAF_WORDOFFS)))
2353                             mod = 0;
2354                         else if (input->eaflags & EAF_BYTEOFFS ||
2355                                  (o >= -128 && o <= 127 && seg == NO_SEG
2356                                   && !forw_ref
2357                                   && !(input->eaflags & EAF_WORDOFFS)))
2358                             mod = 1;
2359                         else
2360                             mod = 2;
2361                     }
2362
2363                     output->sib_present = false;
2364                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2365                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2366                 } else {
2367                     /* we need a SIB */
2368                     int mod, scale, index, base;
2369
2370                     if (it == -1)
2371                         index = 4, s = 1;
2372                     else
2373                         index = (it & 7);
2374
2375                     switch (s) {
2376                     case 1:
2377                         scale = 0;
2378                         break;
2379                     case 2:
2380                         scale = 1;
2381                         break;
2382                     case 4:
2383                         scale = 2;
2384                         break;
2385                     case 8:
2386                         scale = 3;
2387                         break;
2388                     default:   /* then what the smeg is it? */
2389                         return NULL;    /* panic */
2390                     }
2391
2392                     if (bt == -1) {
2393                         base = 5;
2394                         mod = 0;
2395                     } else {
2396                         base = (bt & 7);
2397                         if (base != REG_NUM_EBP && o == 0 &&
2398                                     seg == NO_SEG && !forw_ref &&
2399                                     !(input->eaflags &
2400                                       (EAF_BYTEOFFS | EAF_WORDOFFS)))
2401                             mod = 0;
2402                         else if (input->eaflags & EAF_BYTEOFFS ||
2403                                  (o >= -128 && o <= 127 && seg == NO_SEG
2404                                   && !forw_ref
2405                                   && !(input->eaflags & EAF_WORDOFFS)))
2406                             mod = 1;
2407                         else
2408                             mod = 2;
2409                     }
2410
2411                     output->sib_present = true;
2412                     output->bytes =  (bt == -1 || mod == 2 ? 4 : mod);
2413                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | 4;
2414                     output->sib = (scale << 6) | (index << 3) | base;
2415                 }
2416             } else {            /* it's 16-bit */
2417                 int mod, rm;
2418
2419                 /* check for 64-bit long mode */
2420                 if (addrbits == 64)
2421                     return NULL;
2422
2423                 /* check all registers are BX, BP, SI or DI */
2424                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI
2425                      && b != R_DI) || (i != -1 && i != R_BP && i != R_BX
2426                                        && i != R_SI && i != R_DI))
2427                     return NULL;
2428
2429                 /* ensure the user didn't specify DWORD/QWORD */
2430                 if (input->disp_size == 32 || input->disp_size == 64)
2431                     return NULL;
2432
2433                 if (s != 1 && i != -1)
2434                     return NULL;        /* no can do, in 16-bit EA */
2435                 if (b == -1 && i != -1) {
2436                     int tmp = b;
2437                     b = i;
2438                     i = tmp;
2439                 }               /* swap */
2440                 if ((b == R_SI || b == R_DI) && i != -1) {
2441                     int tmp = b;
2442                     b = i;
2443                     i = tmp;
2444                 }
2445                 /* have BX/BP as base, SI/DI index */
2446                 if (b == i)
2447                     return NULL;        /* shouldn't ever happen, in theory */
2448                 if (i != -1 && b != -1 &&
2449                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2450                     return NULL;        /* invalid combinations */
2451                 if (b == -1)    /* pure offset: handled above */
2452                     return NULL;        /* so if it gets to here, panic! */
2453
2454                 rm = -1;
2455                 if (i != -1)
2456                     switch (i * 256 + b) {
2457                     case R_SI * 256 + R_BX:
2458                         rm = 0;
2459                         break;
2460                     case R_DI * 256 + R_BX:
2461                         rm = 1;
2462                         break;
2463                     case R_SI * 256 + R_BP:
2464                         rm = 2;
2465                         break;
2466                     case R_DI * 256 + R_BP:
2467                         rm = 3;
2468                         break;
2469                 } else
2470                     switch (b) {
2471                     case R_SI:
2472                         rm = 4;
2473                         break;
2474                     case R_DI:
2475                         rm = 5;
2476                         break;
2477                     case R_BP:
2478                         rm = 6;
2479                         break;
2480                     case R_BX:
2481                         rm = 7;
2482                         break;
2483                     }
2484                 if (rm == -1)   /* can't happen, in theory */
2485                     return NULL;        /* so panic if it does */
2486
2487                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2488                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2489                     mod = 0;
2490                 else if (input->eaflags & EAF_BYTEOFFS ||
2491                          (o >= -128 && o <= 127 && seg == NO_SEG
2492                           && !forw_ref
2493                           && !(input->eaflags & EAF_WORDOFFS)))
2494                     mod = 1;
2495                 else
2496                     mod = 2;
2497
2498                 output->sib_present = false;    /* no SIB - it's 16-bit */
2499                 output->bytes = mod;    /* bytes of offset needed */
2500                 output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2501             }
2502         }
2503     }
2504
2505     output->size = 1 + output->sib_present + output->bytes;
2506     return output;
2507 }
2508
2509 static void add_asp(insn *ins, int addrbits)
2510 {
2511     int j, valid;
2512     int defdisp;
2513
2514     valid = (addrbits == 64) ? 64|32 : 32|16;
2515
2516     switch (ins->prefixes[PPS_ASIZE]) {
2517     case P_A16:
2518         valid &= 16;
2519         break;
2520     case P_A32:
2521         valid &= 32;
2522         break;
2523     case P_A64:
2524         valid &= 64;
2525         break;
2526     case P_ASP:
2527         valid &= (addrbits == 32) ? 16 : 32;
2528         break;
2529     default:
2530         break;
2531     }
2532
2533     for (j = 0; j < ins->operands; j++) {
2534         if (is_class(MEMORY, ins->oprs[j].type)) {
2535             opflags_t i, b;
2536
2537             /* Verify as Register */
2538             if (ins->oprs[j].indexreg < EXPR_REG_START
2539                 || ins->oprs[j].indexreg >= REG_ENUM_LIMIT)
2540                 i = 0;
2541             else
2542                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2543
2544             /* Verify as Register */
2545             if (ins->oprs[j].basereg < EXPR_REG_START
2546                 || ins->oprs[j].basereg >= REG_ENUM_LIMIT)
2547                 b = 0;
2548             else
2549                 b = nasm_reg_flags[ins->oprs[j].basereg];
2550
2551             if (ins->oprs[j].scale == 0)
2552                 i = 0;
2553
2554             if (!i && !b) {
2555                 int ds = ins->oprs[j].disp_size;
2556                 if ((addrbits != 64 && ds > 8) ||
2557                     (addrbits == 64 && ds == 16))
2558                     valid &= ds;
2559             } else {
2560                 if (!(REG16 & ~b))
2561                     valid &= 16;
2562                 if (!(REG32 & ~b))
2563                     valid &= 32;
2564                 if (!(REG64 & ~b))
2565                     valid &= 64;
2566
2567                 if (!(REG16 & ~i))
2568                     valid &= 16;
2569                 if (!(REG32 & ~i))
2570                     valid &= 32;
2571                 if (!(REG64 & ~i))
2572                     valid &= 64;
2573             }
2574         }
2575     }
2576
2577     if (valid & addrbits) {
2578         ins->addr_size = addrbits;
2579     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2580         /* Add an address size prefix */
2581         enum prefixes pref = (addrbits == 32) ? P_A16 : P_A32;
2582         ins->prefixes[PPS_ASIZE] = pref;
2583         ins->addr_size = (addrbits == 32) ? 16 : 32;
2584     } else {
2585         /* Impossible... */
2586         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2587         ins->addr_size = addrbits; /* Error recovery */
2588     }
2589
2590     defdisp = ins->addr_size == 16 ? 16 : 32;
2591
2592     for (j = 0; j < ins->operands; j++) {
2593         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2594             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp)
2595             != ins->addr_size) {
2596             /* mem_offs sizes must match the address size; if not,
2597                strip the MEM_OFFS bit and match only EA instructions */
2598             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2599         }
2600     }
2601 }