assemble.c

   1 /* ----------------------------------------------------------------------- *
   2  *
   3  *   Copyright 1996-2009 The NASM Authors - All Rights Reserved
   4  *   See the file AUTHORS included with the NASM distribution for
   5  *   the specific copyright holders.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following
   9  *   conditions are met:
  10  *
  11  *   * Redistributions of source code must retain the above copyright
  12  *     notice, this list of conditions and the following disclaimer.
  13  *   * Redistributions in binary form must reproduce the above
  14  *     copyright notice, this list of conditions and the following
  15  *     disclaimer in the documentation and/or other materials provided
  16  *     with the distribution.
  17  *
  18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  19  *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  20  *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  21  *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22  *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  23  *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24  *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  25  *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28  *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  29  *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  30  *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31  *
  32  * ----------------------------------------------------------------------- */
  33
  34 /*
  35  * assemble.c   code generation for the Netwide Assembler
  36  *
  37  * the actual codes (C syntax, i.e. octal):
  38  * \0            - terminates the code. (Unless it's a literal of course.)
  39  * \1..\4        - that many literal bytes follow in the code stream
  40  * \5            - add 4 to the primary operand number (b, low octdigit)
  41  * \6            - add 4 to the secondary operand number (a, middle octdigit)
  42  * \7            - add 4 to both the primary and the secondary operand number
  43  * \10..\13      - a literal byte follows in the code stream, to be added
  44  *                 to the register value of operand 0..3
  45  * \14..\17      - a signed byte immediate operand, from operand 0..3
  46  * \20..\23      - a byte immediate operand, from operand 0..3
  47  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  48  * \30..\33      - a word immediate operand, from operand 0..3
  49  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  50  *                 assembly mode or the operand-size override on the operand
  51  * \40..\43      - a long immediate operand, from operand 0..3
  52  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  53  *                 depending on the address size of the instruction.
  54  * \50..\53      - a byte relative operand, from operand 0..3
  55  * \54..\57      - a qword immediate operand, from operand 0..3
  56  * \60..\63      - a word relative operand, from operand 0..3
  57  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  58  *                 assembly mode or the operand-size override on the operand
  59  * \70..\73      - a long relative operand, from operand 0..3
  60  * \74..\77      - a word constant, from the _segment_ part of operand 0..3
  61  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  62  *                 field the register value of operand b.
  63  * \140..\143    - an immediate word or signed byte for operand 0..3
  64  * \144..\147    - or 2 (s-field) into opcode byte if operand 0..3
  65  *                  is a signed byte rather than a word.  Opcode byte follows.
  66  * \150..\153    - an immediate dword or signed byte for operand 0..3
  67  * \154..\157    - or 2 (s-field) into opcode byte if operand 0..3
  68  *                  is a signed byte rather than a dword.  Opcode byte follows.
  69  * \160..\163    - this instruction uses DREX rather than REX, with the
  70  *                 OC0 field set to 0, and the dest field taken from
  71  *                 operand 0..3.
  72  * \164..\167    - this instruction uses DREX rather than REX, with the
  73  *                 OC0 field set to 1, and the dest field taken from
  74  *                 operand 0..3.
  75  * \171          - placement of DREX suffix in the absence of an EA
  76  * \172\ab       - the register number from operand a in bits 7..4, with
  77  *                 the 4-bit immediate from operand b in bits 3..0.
  78  * \173\xab      - the register number from operand a in bits 7..4, with
  79  *                 the value b in bits 3..0.
  80  * \174\a        - the register number from operand a in bits 7..4, and
  81  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  82  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  83  *                 field equal to digit b.
  84  * \250..\253    - same as \150..\153, except warn if the 64-bit operand
  85  *                 is not equal to the truncated and sign-extended 32-bit
  86  *                 operand; used for 32-bit immediates in 64-bit mode.
  87  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
  88  * \260..\263    - this instruction uses VEX/XOP rather than REX, with the
  89  *                 V field taken from operand 0..3.
  90  * \270          - this instruction uses VEX/XOP rather than REX, with the
  91  *                 V field set to 1111b.
  92  *
  93  * VEX/XOP prefixes are followed by the sequence:
  94  * \tmm\wlp        where mm is the M field; and wlp is:
  95  *                 00 0ww lpp
  96  *                 [w0] ww = 0 for W = 0
  97  *                 [w1] ww = 1 for W = 1
  98  *                 [wx] ww = 2 for W don't care (always assembled as 0)
  99  *                 [ww] ww = 3 for W used as REX.W
 100  *
 101  * t = 0 for VEX (C4/C5), t = 1 for XOP (8F).
 102  *
 103  * \274..\277    - a signed byte immediate operand, from operand 0..3,
 104  *                 which is to be extended to the operand size.
 105  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
 106  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
 107  * \312          - (disassembler only) invalid with non-default address size.
 108  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
 109  * \314          - (disassembler only) invalid with REX.B
 110  * \315          - (disassembler only) invalid with REX.X
 111  * \316          - (disassembler only) invalid with REX.R
 112  * \317          - (disassembler only) invalid with REX.W
 113  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
 114  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
 115  * \322          - indicates that this instruction is only valid when the
 116  *                 operand size is the default (instruction to disassembler,
 117  *                 generates no code in the assembler)
 118  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
 119  * \324          - indicates 64-bit operand size requiring REX prefix.
 120  * \325          - instruction which always uses spl/bpl/sil/dil
 121  * \330          - a literal byte follows in the code stream, to be added
 122  *                 to the condition code value of the instruction.
 123  * \331          - instruction not valid with REP prefix.  Hint for
 124  *                 disassembler only; for SSE instructions.
 125  * \332          - REP prefix (0xF2 byte) used as opcode extension.
 126  * \333          - REP prefix (0xF3 byte) used as opcode extension.
 127  * \334          - LOCK prefix used as REX.R (used in non-64-bit mode)
 128  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
 129  * \336          - force a REP(E) prefix (0xF2) even if not specified.
 130  * \337          - force a REPNE prefix (0xF3) even if not specified.
 131  *                 \336-\337 are still listed as prefixes in the disassembler.
 132  * \340          - reserve <operand 0> bytes of uninitialized storage.
 133  *                 Operand 0 had better be a segmentless constant.
 134  * \341          - this instruction needs a WAIT "prefix"
 135  * \344,\345     - the PUSH/POP (respectively) codes for CS, DS, ES, SS
 136  *                 (POP is never used for CS) depending on operand 0
 137  * \346,\347     - the second byte of PUSH/POP codes for FS, GS, depending
 138  *                 on operand 0
 139  * \360          - no SSE prefix (== \364\331)
 140  * \361          - 66 SSE prefix (== \366\331)
 141  * \362          - F2 SSE prefix (== \364\332)
 142  * \363          - F3 SSE prefix (== \364\333)
 143  * \364          - operand-size prefix (0x66) not permitted
 144  * \365          - address-size prefix (0x67) not permitted
 145  * \366          - operand-size prefix (0x66) used as opcode extension
 146  * \367          - address-size prefix (0x67) used as opcode extension
 147  * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
 148  *                 370 is used for Jcc, 371 is used for JMP.
 149  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 150  *                 used for conditional jump over longer jump
 151  */
 152
 153 #include "compiler.h"
 154
 155 #include <stdio.h>
 156 #include <string.h>
 157 #include <inttypes.h>
 158
 159 #include "nasm.h"
 160 #include "nasmlib.h"
 161 #include "assemble.h"
 162 #include "insns.h"
 163 #include "tables.h"
 164
 165 enum match_result {
 166     /*
 167      * Matching errors.  These should be sorted so that more specific
 168      * errors come later in the sequence.
 169      */
 170     MERR_INVALOP,
 171     MERR_OPSIZEMISSING,
 172     MERR_OPSIZEMISMATCH,
 173     MERR_BADCPU,
 174     MERR_BADMODE,
 175     /*
 176      * Matching success; the conditional ones first
 177      */
 178     MOK_JUMP,                   /* Matching OK but needs jmp_match() */
 179     MOK_GOOD                    /* Matching unconditionally OK */
 180 };
 181
 182 typedef struct {
 183     int sib_present;                 /* is a SIB byte necessary? */
 184     int bytes;                       /* # of bytes of offset needed */
 185     int size;                        /* lazy - this is sib+bytes+1 */
 186     uint8_t modrm, sib, rex, rip;    /* the bytes themselves */
 187 } ea;
 188
 189 static uint32_t cpu;            /* cpu level received from nasm.c */
 190 static efunc errfunc;
 191 static struct ofmt *outfmt;
 192 static ListGen *list;
 193
 194 static int64_t calcsize(int32_t, int64_t, int, insn *, const uint8_t *);
 195 static void gencode(int32_t segment, int64_t offset, int bits,
 196                     insn * ins, const struct itemplate *temp,
 197                     int64_t insn_end);
 198 static enum match_result find_match(const struct itemplate **tempp,
 199                                     insn *instruction,
 200                                     int32_t segment, int64_t offset, int bits);
 201 static enum match_result matches(const struct itemplate *, insn *, int bits);
 202 static opflags_t regflag(const operand *);
 203 static int32_t regval(const operand *);
 204 static int rexflags(int, opflags_t, int);
 205 static int op_rexflags(const operand *, int);
 206 static ea *process_ea(operand *, ea *, int, int, int, opflags_t);
 207 static void add_asp(insn *, int);
 208
 209 static int has_prefix(insn * ins, enum prefix_pos pos, enum prefixes prefix)
 210 {
 211     return ins->prefixes[pos] == prefix;
 212 }
 213
 214 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 215 {
 216     if (ins->prefixes[pos])
 217         errfunc(ERR_NONFATAL, "invalid %s prefix",
 218                 prefix_name(ins->prefixes[pos]));
 219 }
 220
 221 static const char *size_name(int size)
 222 {
 223     switch (size) {
 224     case 1:
 225         return "byte";
 226     case 2:
 227         return "word";
 228     case 4:
 229         return "dword";
 230     case 8:
 231         return "qword";
 232     case 10:
 233         return "tword";
 234     case 16:
 235         return "oword";
 236     case 32:
 237         return "yword";
 238     default:
 239         return "???";
 240     }
 241 }
 242
 243 static void warn_overflow(int pass, int size)
 244 {
 245     errfunc(ERR_WARNING | pass | ERR_WARN_NOV,
 246             "%s data exceeds bounds", size_name(size));
 247 }
 248
 249 static void warn_overflow_const(int64_t data, int size)
 250 {
 251     if (overflow_general(data, size))
 252         warn_overflow(ERR_PASS1, size);
 253 }
 254
 255 static void warn_overflow_opd(const struct operand *o, int size)
 256 {
 257     if (size < 8 && o->wrt == NO_SEG && o->segment == NO_SEG) {
 258         if (overflow_general(o->offset, size))
 259             warn_overflow(ERR_PASS2, size);
 260     }
 261 }
 262
 263 /*
 264  * This routine wrappers the real output format's output routine,
 265  * in order to pass a copy of the data off to the listing file
 266  * generator at the same time.
 267  */
 268 static void out(int64_t offset, int32_t segto, const void *data,
 269                 enum out_type type, uint64_t size,
 270                 int32_t segment, int32_t wrt)
 271 {
 272     static int32_t lineno = 0;     /* static!!! */
 273     static char *lnfname = NULL;
 274     uint8_t p[8];
 275
 276     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 277         /*
 278          * This is a non-relocated address, and we're going to
 279          * convert it into RAWDATA format.
 280          */
 281         uint8_t *q = p;
 282
 283         if (size > 8) {
 284             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 285             return;
 286         }
 287
 288         WRITEADDR(q, *(int64_t *)data, size);
 289         data = p;
 290         type = OUT_RAWDATA;
 291     }
 292
 293     list->output(offset, data, type, size);
 294
 295     /*
 296      * this call to src_get determines when we call the
 297      * debug-format-specific "linenum" function
 298      * it updates lineno and lnfname to the current values
 299      * returning 0 if "same as last time", -2 if lnfname
 300      * changed, and the amount by which lineno changed,
 301      * if it did. thus, these variables must be static
 302      */
 303
 304     if (src_get(&lineno, &lnfname)) {
 305         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 306     }
 307
 308     outfmt->output(segto, data, type, size, segment, wrt);
 309 }
 310
 311 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 312                      insn * ins, const uint8_t *code)
 313 {
 314     int64_t isize;
 315     uint8_t c = code[0];
 316
 317     if ((c != 0370 && c != 0371) || (ins->oprs[0].type & STRICT))
 318         return false;
 319     if (!optimizing)
 320         return false;
 321     if (optimizing < 0 && c == 0371)
 322         return false;
 323
 324     isize = calcsize(segment, offset, bits, ins, code);
 325
 326     if (ins->oprs[0].opflags & OPFLAG_UNKNOWN)
 327         /* Be optimistic in pass 1 */
 328         return true;
 329
 330     if (ins->oprs[0].segment != segment)
 331         return false;
 332
 333     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 334     return (isize >= -128 && isize <= 127); /* is it byte size? */
 335 }
 336
 337 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 338                  insn * instruction, struct ofmt *output, efunc error,
 339                  ListGen * listgen)
 340 {
 341     const struct itemplate *temp;
 342     int j;
 343     enum match_result m;
 344     int64_t insn_end;
 345     int32_t itimes;
 346     int64_t start = offset;
 347     int64_t wsize = 0;             /* size for DB etc. */
 348
 349     errfunc = error;            /* to pass to other functions */
 350     cpu = cp;
 351     outfmt = output;            /* likewise */
 352     list = listgen;             /* and again */
 353
 354     switch (instruction->opcode) {
 355     case -1:
 356         return 0;
 357     case I_DB:
 358         wsize = 1;
 359         break;
 360     case I_DW:
 361         wsize = 2;
 362         break;
 363     case I_DD:
 364         wsize = 4;
 365         break;
 366     case I_DQ:
 367         wsize = 8;
 368         break;
 369     case I_DT:
 370         wsize = 10;
 371         break;
 372     case I_DO:
 373         wsize = 16;
 374         break;
 375     case I_DY:
 376         wsize = 32;
 377         break;
 378     default:
 379         break;
 380     }
 381
 382     if (wsize) {
 383         extop *e;
 384         int32_t t = instruction->times;
 385         if (t < 0)
 386             errfunc(ERR_PANIC,
 387                     "instruction->times < 0 (%ld) in assemble()", t);
 388
 389         while (t--) {           /* repeat TIMES times */
 390             list_for_each(e, instruction->eops) {
 391                 if (e->type == EOT_DB_NUMBER) {
 392                     if (wsize == 1) {
 393                         if (e->segment != NO_SEG)
 394                             errfunc(ERR_NONFATAL,
 395                                     "one-byte relocation attempted");
 396                         else {
 397                             uint8_t out_byte = e->offset;
 398                             out(offset, segment, &out_byte,
 399                                 OUT_RAWDATA, 1, NO_SEG, NO_SEG);
 400                         }
 401                     } else if (wsize > 8) {
 402                         errfunc(ERR_NONFATAL,
 403                                 "integer supplied to a DT, DO or DY"
 404                                 " instruction");
 405                     } else
 406                         out(offset, segment, &e->offset,
 407                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 408                     offset += wsize;
 409                 } else if (e->type == EOT_DB_STRING ||
 410                            e->type == EOT_DB_STRING_FREE) {
 411                     int align;
 412
 413                     out(offset, segment, e->stringval,
 414                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 415                     align = e->stringlen % wsize;
 416
 417                     if (align) {
 418                         align = wsize - align;
 419                         out(offset, segment, zero_buffer,
 420                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 421                     }
 422                     offset += e->stringlen + align;
 423                 }
 424             }
 425             if (t > 0 && t == instruction->times - 1) {
 426                 /*
 427                  * Dummy call to list->output to give the offset to the
 428                  * listing module.
 429                  */
 430                 list->output(offset, NULL, OUT_RAWDATA, 0);
 431                 list->uplevel(LIST_TIMES);
 432             }
 433         }
 434         if (instruction->times > 1)
 435             list->downlevel(LIST_TIMES);
 436         return offset - start;
 437     }
 438
 439     if (instruction->opcode == I_INCBIN) {
 440         const char *fname = instruction->eops->stringval;
 441         FILE *fp;
 442
 443         fp = fopen(fname, "rb");
 444         if (!fp) {
 445             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 446                   fname);
 447         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 448             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 449                   fname);
 450         } else {
 451             static char buf[4096];
 452             size_t t = instruction->times;
 453             size_t base = 0;
 454             size_t len;
 455
 456             len = ftell(fp);
 457             if (instruction->eops->next) {
 458                 base = instruction->eops->next->offset;
 459                 len -= base;
 460                 if (instruction->eops->next->next &&
 461                     len > (size_t)instruction->eops->next->next->offset)
 462                     len = (size_t)instruction->eops->next->next->offset;
 463             }
 464             /*
 465              * Dummy call to list->output to give the offset to the
 466              * listing module.
 467              */
 468             list->output(offset, NULL, OUT_RAWDATA, 0);
 469             list->uplevel(LIST_INCBIN);
 470             while (t--) {
 471                 size_t l;
 472
 473                 fseek(fp, base, SEEK_SET);
 474                 l = len;
 475                 while (l > 0) {
 476                     int32_t m;
 477                     m = fread(buf, 1, l > sizeof(buf) ? sizeof(buf) : l, fp);
 478                     if (!m) {
 479                         /*
 480                          * This shouldn't happen unless the file
 481                          * actually changes while we are reading
 482                          * it.
 483                          */
 484                         error(ERR_NONFATAL,
 485                               "`incbin': unexpected EOF while"
 486                               " reading file `%s'", fname);
 487                         t = 0;  /* Try to exit cleanly */
 488                         break;
 489                     }
 490                     out(offset, segment, buf, OUT_RAWDATA, m,
 491                         NO_SEG, NO_SEG);
 492                     l -= m;
 493                 }
 494             }
 495             list->downlevel(LIST_INCBIN);
 496             if (instruction->times > 1) {
 497                 /*
 498                  * Dummy call to list->output to give the offset to the
 499                  * listing module.
 500                  */
 501                 list->output(offset, NULL, OUT_RAWDATA, 0);
 502                 list->uplevel(LIST_TIMES);
 503                 list->downlevel(LIST_TIMES);
 504             }
 505             fclose(fp);
 506             return instruction->times * len;
 507         }
 508         return 0;               /* if we're here, there's an error */
 509     }
 510
 511     /* Check to see if we need an address-size prefix */
 512     add_asp(instruction, bits);
 513
 514     m = find_match(&temp, instruction, segment, offset, bits);
 515
 516     if (m == MOK_GOOD) {
 517         /* Matches! */
 518         int64_t insn_size = calcsize(segment, offset, bits,
 519                                      instruction, temp->code);
 520         itimes = instruction->times;
 521         if (insn_size < 0)  /* shouldn't be, on pass two */
 522             error(ERR_PANIC, "errors made it through from pass one");
 523         else
 524             while (itimes--) {
 525                 for (j = 0; j < MAXPREFIX; j++) {
 526                     uint8_t c = 0;
 527                     switch (instruction->prefixes[j]) {
 528                     case P_WAIT:
 529                         c = 0x9B;
 530                         break;
 531                     case P_LOCK:
 532                         c = 0xF0;
 533                         break;
 534                     case P_REPNE:
 535                     case P_REPNZ:
 536                         c = 0xF2;
 537                         break;
 538                     case P_REPE:
 539                     case P_REPZ:
 540                     case P_REP:
 541                         c = 0xF3;
 542                         break;
 543                     case R_CS:
 544                         if (bits == 64) {
 545                             error(ERR_WARNING | ERR_PASS2,
 546                                   "cs segment base generated, but will be ignored in 64-bit mode");
 547                         }
 548                         c = 0x2E;
 549                         break;
 550                     case R_DS:
 551                         if (bits == 64) {
 552                             error(ERR_WARNING | ERR_PASS2,
 553                                   "ds segment base generated, but will be ignored in 64-bit mode");
 554                         }
 555                         c = 0x3E;
 556                         break;
 557                     case R_ES:
 558                         if (bits == 64) {
 559                             error(ERR_WARNING | ERR_PASS2,
 560                                   "es segment base generated, but will be ignored in 64-bit mode");
 561                         }
 562                         c = 0x26;
 563                         break;
 564                     case R_FS:
 565                         c = 0x64;
 566                         break;
 567                     case R_GS:
 568                         c = 0x65;
 569                         break;
 570                     case R_SS:
 571                         if (bits == 64) {
 572                             error(ERR_WARNING | ERR_PASS2,
 573                                   "ss segment base generated, but will be ignored in 64-bit mode");
 574                         }
 575                         c = 0x36;
 576                         break;
 577                     case R_SEGR6:
 578                     case R_SEGR7:
 579                         error(ERR_NONFATAL,
 580                               "segr6 and segr7 cannot be used as prefixes");
 581                         break;
 582                     case P_A16:
 583                         if (bits == 64) {
 584                             error(ERR_NONFATAL,
 585                                   "16-bit addressing is not supported "
 586                                   "in 64-bit mode");
 587                         } else if (bits != 16)
 588                             c = 0x67;
 589                         break;
 590                     case P_A32:
 591                         if (bits != 32)
 592                             c = 0x67;
 593                         break;
 594                     case P_A64:
 595                         if (bits != 64) {
 596                             error(ERR_NONFATAL,
 597                                   "64-bit addressing is only supported "
 598                                   "in 64-bit mode");
 599                         }
 600                         break;
 601                     case P_ASP:
 602                         c = 0x67;
 603                         break;
 604                     case P_O16:
 605                         if (bits != 16)
 606                             c = 0x66;
 607                         break;
 608                     case P_O32:
 609                         if (bits == 16)
 610                             c = 0x66;
 611                         break;
 612                     case P_O64:
 613                         /* REX.W */
 614                         break;
 615                     case P_OSP:
 616                         c = 0x66;
 617                         break;
 618                     case P_none:
 619                         break;
 620                     default:
 621                         error(ERR_PANIC, "invalid instruction prefix");
 622                     }
 623                     if (c != 0) {
 624                         out(offset, segment, &c, OUT_RAWDATA, 1,
 625                             NO_SEG, NO_SEG);
 626                         offset++;
 627                     }
 628                 }
 629                 insn_end = offset + insn_size;
 630                 gencode(segment, offset, bits, instruction,
 631                         temp, insn_end);
 632                 offset += insn_size;
 633                 if (itimes > 0 && itimes == instruction->times - 1) {
 634                     /*
 635                      * Dummy call to list->output to give the offset to the
 636                      * listing module.
 637                      */
 638                     list->output(offset, NULL, OUT_RAWDATA, 0);
 639                     list->uplevel(LIST_TIMES);
 640                 }
 641             }
 642         if (instruction->times > 1)
 643             list->downlevel(LIST_TIMES);
 644         return offset - start;
 645     } else {
 646         /* No match */
 647         switch (m) {
 648         case MERR_OPSIZEMISSING:
 649             error(ERR_NONFATAL, "operation size not specified");
 650             break;
 651         case MERR_OPSIZEMISMATCH:
 652             error(ERR_NONFATAL, "mismatch in operand sizes");
 653             break;
 654         case MERR_BADCPU:
 655             error(ERR_NONFATAL, "no instruction for this cpu level");
 656             break;
 657         case MERR_BADMODE:
 658             error(ERR_NONFATAL, "instruction not supported in %d-bit mode",
 659                   bits);
 660             break;
 661         default:
 662             error(ERR_NONFATAL,
 663                   "invalid combination of opcode and operands");
 664             break;
 665         }
 666     }
 667     return 0;
 668 }
 669
 670 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 671                   insn * instruction, efunc error)
 672 {
 673     const struct itemplate *temp;
 674     enum match_result m;
 675
 676     errfunc = error;            /* to pass to other functions */
 677     cpu = cp;
 678
 679     if (instruction->opcode == I_none)
 680         return 0;
 681
 682     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 683         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 684         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 685         instruction->opcode == I_DY) {
 686         extop *e;
 687         int32_t isize, osize, wsize = 0;   /* placate gcc */
 688
 689         isize = 0;
 690         switch (instruction->opcode) {
 691         case I_DB:
 692             wsize = 1;
 693             break;
 694         case I_DW:
 695             wsize = 2;
 696             break;
 697         case I_DD:
 698             wsize = 4;
 699             break;
 700         case I_DQ:
 701             wsize = 8;
 702             break;
 703         case I_DT:
 704             wsize = 10;
 705             break;
 706         case I_DO:
 707             wsize = 16;
 708             break;
 709         case I_DY:
 710             wsize = 32;
 711             break;
 712         default:
 713             break;
 714         }
 715
 716         list_for_each(e, instruction->eops) {
 717             int32_t align;
 718
 719             osize = 0;
 720             if (e->type == EOT_DB_NUMBER) {
 721                 osize = 1;
 722                 warn_overflow_const(e->offset, wsize);
 723             } else if (e->type == EOT_DB_STRING ||
 724                        e->type == EOT_DB_STRING_FREE)
 725                 osize = e->stringlen;
 726
 727             align = (-osize) % wsize;
 728             if (align < 0)
 729                 align += wsize;
 730             isize += osize + align;
 731         }
 732         return isize * instruction->times;
 733     }
 734
 735     if (instruction->opcode == I_INCBIN) {
 736         const char *fname = instruction->eops->stringval;
 737         FILE *fp;
 738         size_t len;
 739
 740         fp = fopen(fname, "rb");
 741         if (!fp)
 742             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 743                   fname);
 744         else if (fseek(fp, 0L, SEEK_END) < 0)
 745             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 746                   fname);
 747         else {
 748             len = ftell(fp);
 749             fclose(fp);
 750             if (instruction->eops->next) {
 751                 len -= instruction->eops->next->offset;
 752                 if (instruction->eops->next->next &&
 753                     len > (size_t)instruction->eops->next->next->offset) {
 754                     len = (size_t)instruction->eops->next->next->offset;
 755                 }
 756             }
 757             return instruction->times * len;
 758         }
 759         return 0;               /* if we're here, there's an error */
 760     }
 761
 762     /* Check to see if we need an address-size prefix */
 763     add_asp(instruction, bits);
 764
 765     m = find_match(&temp, instruction, segment, offset, bits);
 766     if (m == MOK_GOOD) {
 767         /* we've matched an instruction. */
 768         int64_t isize;
 769         const uint8_t *codes = temp->code;
 770         int j;
 771
 772         isize = calcsize(segment, offset, bits, instruction, codes);
 773         if (isize < 0)
 774             return -1;
 775         for (j = 0; j < MAXPREFIX; j++) {
 776             switch (instruction->prefixes[j]) {
 777             case P_A16:
 778                 if (bits != 16)
 779                     isize++;
 780                 break;
 781             case P_A32:
 782                 if (bits != 32)
 783                     isize++;
 784                 break;
 785             case P_O16:
 786                 if (bits != 16)
 787                     isize++;
 788                 break;
 789             case P_O32:
 790                 if (bits == 16)
 791                     isize++;
 792                 break;
 793             case P_A64:
 794             case P_O64:
 795             case P_none:
 796                 break;
 797             default:
 798                 isize++;
 799                 break;
 800             }
 801         }
 802         return isize * instruction->times;
 803     } else {
 804         return -1;                  /* didn't match any instruction */
 805     }
 806 }
 807
 808 static bool possible_sbyte(operand *o)
 809 {
 810     return o->wrt == NO_SEG && o->segment == NO_SEG &&
 811         !(o->opflags & OPFLAG_UNKNOWN) &&
 812         optimizing >= 0 && !(o->type & STRICT);
 813 }
 814
 815 /* check that opn[op]  is a signed byte of size 16 or 32 */
 816 static bool is_sbyte16(operand *o)
 817 {
 818     int16_t v;
 819
 820     if (!possible_sbyte(o))
 821         return false;
 822
 823     v = o->offset;
 824     return v >= -128 && v <= 127;
 825 }
 826
 827 static bool is_sbyte32(operand *o)
 828 {
 829     int32_t v;
 830
 831     if (!possible_sbyte(o))
 832         return false;
 833
 834     v = o->offset;
 835     return v >= -128 && v <= 127;
 836 }
 837
 838 /* Common construct */
 839 #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
 840
 841 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 842                         insn * ins, const uint8_t *codes)
 843 {
 844     int64_t length = 0;
 845     uint8_t c;
 846     int rex_mask = ~0;
 847     int op1, op2;
 848     struct operand *opx;
 849     uint8_t opex = 0;
 850
 851     ins->rex = 0;               /* Ensure REX is reset */
 852
 853     if (ins->prefixes[PPS_OSIZE] == P_O64)
 854         ins->rex |= REX_W;
 855
 856     (void)segment;              /* Don't warn that this parameter is unused */
 857     (void)offset;               /* Don't warn that this parameter is unused */
 858
 859     while (*codes) {
 860         c = *codes++;
 861         op1 = (c & 3) + ((opex & 1) << 2);
 862         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
 863         opx = &ins->oprs[op1];
 864         opex = 0;               /* For the next iteration */
 865
 866         switch (c) {
 867         case 01:
 868         case 02:
 869         case 03:
 870         case 04:
 871             codes += c, length += c;
 872             break;
 873
 874         case 05:
 875         case 06:
 876         case 07:
 877             opex = c;
 878             break;
 879
 880         case4(010):
 881             ins->rex |=
 882                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 883             codes++, length++;
 884             break;
 885
 886         case4(014):
 887         case4(020):
 888         case4(024):
 889             length++;
 890             break;
 891
 892         case4(030):
 893             length += 2;
 894             break;
 895
 896         case4(034):
 897             if (opx->type & (BITS16 | BITS32 | BITS64))
 898                 length += (opx->type & BITS16) ? 2 : 4;
 899             else
 900                 length += (bits == 16) ? 2 : 4;
 901             break;
 902
 903         case4(040):
 904             length += 4;
 905             break;
 906
 907         case4(044):
 908             length += ins->addr_size >> 3;
 909             break;
 910
 911         case4(050):
 912             length++;
 913             break;
 914
 915         case4(054):
 916             length += 8; /* MOV reg64/imm */
 917             break;
 918
 919         case4(060):
 920             length += 2;
 921             break;
 922
 923         case4(064):
 924             if (opx->type & (BITS16 | BITS32 | BITS64))
 925                 length += (opx->type & BITS16) ? 2 : 4;
 926             else
 927                 length += (bits == 16) ? 2 : 4;
 928             break;
 929
 930         case4(070):
 931             length += 4;
 932             break;
 933
 934         case4(074):
 935             length += 2;
 936             break;
 937
 938         case4(0140):
 939             length += is_sbyte16(opx) ? 1 : 2;
 940             break;
 941
 942         case4(0144):
 943             codes++;
 944             length++;
 945             break;
 946
 947         case4(0150):
 948             length += is_sbyte32(opx) ? 1 : 4;
 949             break;
 950
 951         case4(0154):
 952             codes++;
 953             length++;
 954             break;
 955
 956         case4(0160):
 957             length++;
 958             ins->rex |= REX_D;
 959             ins->drexdst = regval(opx);
 960             break;
 961
 962         case4(0164):
 963             length++;
 964             ins->rex |= REX_D|REX_OC;
 965             ins->drexdst = regval(opx);
 966             break;
 967
 968         case 0171:
 969             break;
 970
 971         case 0172:
 972         case 0173:
 973         case 0174:
 974             codes++;
 975             length++;
 976             break;
 977
 978         case4(0250):
 979             length += is_sbyte32(opx) ? 1 : 4;
 980             break;
 981
 982         case4(0254):
 983             length += 4;
 984             break;
 985
 986         case4(0260):
 987             ins->rex |= REX_V;
 988             ins->drexdst = regval(opx);
 989             ins->vex_cm = *codes++;
 990             ins->vex_wlp = *codes++;
 991             break;
 992
 993         case 0270:
 994             ins->rex |= REX_V;
 995             ins->drexdst = 0;
 996             ins->vex_cm = *codes++;
 997             ins->vex_wlp = *codes++;
 998             break;
 999
1000         case4(0274):
1001             length++;
1002             break;
1003
1004         case4(0300):
1005             break;
1006
1007         case 0310:
1008             if (bits == 64)
1009                 return -1;
1010             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
1011             break;
1012
1013         case 0311:
1014             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
1015             break;
1016
1017         case 0312:
1018             break;
1019
1020         case 0313:
1021             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
1022                 has_prefix(ins, PPS_ASIZE, P_A32))
1023                 return -1;
1024             break;
1025
1026         case4(0314):
1027             break;
1028
1029         case 0320:
1030             length += (bits != 16);
1031             break;
1032
1033         case 0321:
1034             length += (bits == 16);
1035             break;
1036
1037         case 0322:
1038             break;
1039
1040         case 0323:
1041             rex_mask &= ~REX_W;
1042             break;
1043
1044         case 0324:
1045             ins->rex |= REX_W;
1046             break;
1047
1048         case 0325:
1049             ins->rex |= REX_NH;
1050             break;
1051
1052         case 0330:
1053             codes++, length++;
1054             break;
1055
1056         case 0331:
1057             break;
1058
1059         case 0332:
1060         case 0333:
1061             length++;
1062             break;
1063
1064         case 0334:
1065             ins->rex |= REX_L;
1066             break;
1067
1068         case 0335:
1069             break;
1070
1071         case 0336:
1072             if (!ins->prefixes[PPS_LREP])
1073                 ins->prefixes[PPS_LREP] = P_REP;
1074             break;
1075
1076         case 0337:
1077             if (!ins->prefixes[PPS_LREP])
1078                 ins->prefixes[PPS_LREP] = P_REPNE;
1079             break;
1080
1081         case 0340:
1082             if (ins->oprs[0].segment != NO_SEG)
1083                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1084                         " quantity of BSS space");
1085             else
1086                 length += ins->oprs[0].offset;
1087             break;
1088
1089         case 0341:
1090             if (!ins->prefixes[PPS_WAIT])
1091                 ins->prefixes[PPS_WAIT] = P_WAIT;
1092             break;
1093
1094         case4(0344):
1095             length++;
1096             break;
1097
1098         case 0360:
1099             break;
1100
1101         case 0361:
1102         case 0362:
1103         case 0363:
1104             length++;
1105             break;
1106
1107         case 0364:
1108         case 0365:
1109             break;
1110
1111         case 0366:
1112         case 0367:
1113             length++;
1114             break;
1115
1116         case 0370:
1117         case 0371:
1118         case 0372:
1119             break;
1120
1121         case 0373:
1122             length++;
1123             break;
1124
1125         case4(0100):
1126         case4(0110):
1127         case4(0120):
1128         case4(0130):
1129         case4(0200):
1130         case4(0204):
1131         case4(0210):
1132         case4(0214):
1133         case4(0220):
1134         case4(0224):
1135         case4(0230):
1136         case4(0234):
1137             {
1138                 ea ea_data;
1139                 int rfield;
1140                 opflags_t rflags;
1141                 struct operand *opy = &ins->oprs[op2];
1142
1143                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1144
1145                 if (c <= 0177) {
1146                     /* pick rfield from operand b (opx) */
1147                     rflags = regflag(opx);
1148                     rfield = nasm_regvals[opx->basereg];
1149                 } else {
1150                     rflags = 0;
1151                     rfield = c & 7;
1152                 }
1153                 if (!process_ea(opy, &ea_data, bits,
1154                                 ins->addr_size, rfield, rflags)) {
1155                     errfunc(ERR_NONFATAL, "invalid effective address");
1156                     return -1;
1157                 } else {
1158                     ins->rex |= ea_data.rex;
1159                     length += ea_data.size;
1160                 }
1161             }
1162             break;
1163
1164         default:
1165             errfunc(ERR_PANIC, "internal instruction table corrupt"
1166                     ": instruction code \\%o (0x%02X) given", c, c);
1167             break;
1168         }
1169     }
1170
1171     ins->rex &= rex_mask;
1172
1173     if (ins->rex & REX_NH) {
1174         if (ins->rex & REX_H) {
1175             errfunc(ERR_NONFATAL, "instruction cannot use high registers");
1176             return -1;
1177         }
1178         ins->rex &= ~REX_P;     /* Don't force REX prefix due to high reg */
1179     }
1180
1181     if (ins->rex & REX_V) {
1182         int bad32 = REX_R|REX_W|REX_X|REX_B;
1183
1184         if (ins->rex & REX_H) {
1185             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1186             return -1;
1187         }
1188         switch (ins->vex_wlp & 030) {
1189         case 000:
1190         case 020:
1191             ins->rex &= ~REX_W;
1192             break;
1193         case 010:
1194             ins->rex |= REX_W;
1195             bad32 &= ~REX_W;
1196             break;
1197         case 030:
1198             /* Follow REX_W */
1199             break;
1200         }
1201
1202         if (bits != 64 && ((ins->rex & bad32) || ins->drexdst > 7)) {
1203             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1204             return -1;
1205         }
1206         if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_R|REX_B)))
1207             length += 3;
1208         else
1209             length += 2;
1210     } else if (ins->rex & REX_D) {
1211         if (ins->rex & REX_H) {
1212             errfunc(ERR_NONFATAL, "cannot use high register in drex instruction");
1213             return -1;
1214         }
1215         if (bits != 64 && ((ins->rex & (REX_R|REX_W|REX_X|REX_B)) ||
1216                            ins->drexdst > 7)) {
1217             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1218             return -1;
1219         }
1220         length++;
1221     } else if (ins->rex & REX_REAL) {
1222         if (ins->rex & REX_H) {
1223             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1224             return -1;
1225         } else if (bits == 64) {
1226             length++;
1227         } else if ((ins->rex & REX_L) &&
1228                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1229                    cpu >= IF_X86_64) {
1230             /* LOCK-as-REX.R */
1231             assert_no_prefix(ins, PPS_LREP);
1232             length++;
1233         } else {
1234             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1235             return -1;
1236         }
1237     }
1238
1239     return length;
1240 }
1241
1242 #define EMIT_REX()                                                      \
1243     if (!(ins->rex & (REX_D|REX_V)) && (ins->rex & REX_REAL) && (bits == 64)) { \
1244         ins->rex = (ins->rex & REX_REAL)|REX_P;                         \
1245         out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG); \
1246         ins->rex = 0;                                                   \
1247         offset += 1; \
1248     }
1249
1250 static void gencode(int32_t segment, int64_t offset, int bits,
1251                     insn * ins, const struct itemplate *temp,
1252                     int64_t insn_end)
1253 {
1254     static char condval[] = {   /* conditional opcodes */
1255         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1256         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1257         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1258     };
1259     uint8_t c;
1260     uint8_t bytes[4];
1261     int64_t size;
1262     int64_t data;
1263     int op1, op2;
1264     struct operand *opx;
1265     const uint8_t *codes = temp->code;
1266     uint8_t opex = 0;
1267
1268     while (*codes) {
1269         c = *codes++;
1270         op1 = (c & 3) + ((opex & 1) << 2);
1271         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
1272         opx = &ins->oprs[op1];
1273         opex = 0;               /* For the next iteration */
1274
1275         switch (c) {
1276         case 01:
1277         case 02:
1278         case 03:
1279         case 04:
1280             EMIT_REX();
1281             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1282             codes += c;
1283             offset += c;
1284             break;
1285
1286         case 05:
1287         case 06:
1288         case 07:
1289             opex = c;
1290             break;
1291
1292         case4(010):
1293             EMIT_REX();
1294             bytes[0] = *codes++ + (regval(opx) & 7);
1295             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1296             offset += 1;
1297             break;
1298
1299         case4(014):
1300             /* The test for BITS8 and SBYTE here is intended to avoid
1301                warning on optimizer actions due to SBYTE, while still
1302                warn on explicit BYTE directives.  Also warn, obviously,
1303                if the optimizer isn't enabled. */
1304             if (((opx->type & BITS8) ||
1305                  !(opx->type & temp->opd[op1] & BYTENESS)) &&
1306                 (opx->offset < -128 || opx->offset > 127)) {
1307                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1308                         "signed byte value exceeds bounds");
1309             }
1310             if (opx->segment != NO_SEG) {
1311                 data = opx->offset;
1312                 out(offset, segment, &data, OUT_ADDRESS, 1,
1313                     opx->segment, opx->wrt);
1314             } else {
1315                 bytes[0] = opx->offset;
1316                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1317                     NO_SEG);
1318             }
1319             offset += 1;
1320             break;
1321
1322         case4(020):
1323             if (opx->offset < -256 || opx->offset > 255) {
1324                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1325                         "byte value exceeds bounds");
1326             }
1327             if (opx->segment != NO_SEG) {
1328                 data = opx->offset;
1329                 out(offset, segment, &data, OUT_ADDRESS, 1,
1330                     opx->segment, opx->wrt);
1331             } else {
1332                 bytes[0] = opx->offset;
1333                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1334                     NO_SEG);
1335             }
1336             offset += 1;
1337             break;
1338
1339         case4(024):
1340             if (opx->offset < 0 || opx->offset > 255)
1341                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1342                         "unsigned byte value exceeds bounds");
1343             if (opx->segment != NO_SEG) {
1344                 data = opx->offset;
1345                 out(offset, segment, &data, OUT_ADDRESS, 1,
1346                     opx->segment, opx->wrt);
1347             } else {
1348                 bytes[0] = opx->offset;
1349                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1350                     NO_SEG);
1351             }
1352             offset += 1;
1353             break;
1354
1355         case4(030):
1356             warn_overflow_opd(opx, 2);
1357             data = opx->offset;
1358             out(offset, segment, &data, OUT_ADDRESS, 2,
1359                 opx->segment, opx->wrt);
1360             offset += 2;
1361             break;
1362
1363         case4(034):
1364             if (opx->type & (BITS16 | BITS32))
1365                 size = (opx->type & BITS16) ? 2 : 4;
1366             else
1367                 size = (bits == 16) ? 2 : 4;
1368             warn_overflow_opd(opx, size);
1369             data = opx->offset;
1370             out(offset, segment, &data, OUT_ADDRESS, size,
1371                 opx->segment, opx->wrt);
1372             offset += size;
1373             break;
1374
1375         case4(040):
1376             warn_overflow_opd(opx, 4);
1377             data = opx->offset;
1378             out(offset, segment, &data, OUT_ADDRESS, 4,
1379                 opx->segment, opx->wrt);
1380             offset += 4;
1381             break;
1382
1383         case4(044):
1384             data = opx->offset;
1385             size = ins->addr_size >> 3;
1386             warn_overflow_opd(opx, size);
1387             out(offset, segment, &data, OUT_ADDRESS, size,
1388                 opx->segment, opx->wrt);
1389             offset += size;
1390             break;
1391
1392         case4(050):
1393             if (opx->segment != segment)
1394                 errfunc(ERR_NONFATAL,
1395                         "short relative jump outside segment");
1396             data = opx->offset - insn_end;
1397             if (data > 127 || data < -128)
1398                 errfunc(ERR_NONFATAL, "short jump is out of range");
1399             bytes[0] = data;
1400             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1401             offset += 1;
1402             break;
1403
1404         case4(054):
1405             data = (int64_t)opx->offset;
1406             out(offset, segment, &data, OUT_ADDRESS, 8,
1407                 opx->segment, opx->wrt);
1408             offset += 8;
1409             break;
1410
1411         case4(060):
1412             if (opx->segment != segment) {
1413                 data = opx->offset;
1414                 out(offset, segment, &data,
1415                     OUT_REL2ADR, insn_end - offset,
1416                     opx->segment, opx->wrt);
1417             } else {
1418                 data = opx->offset - insn_end;
1419                 out(offset, segment, &data,
1420                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1421             }
1422             offset += 2;
1423             break;
1424
1425         case4(064):
1426             if (opx->type & (BITS16 | BITS32 | BITS64))
1427                 size = (opx->type & BITS16) ? 2 : 4;
1428             else
1429                 size = (bits == 16) ? 2 : 4;
1430             if (opx->segment != segment) {
1431                 data = opx->offset;
1432                 out(offset, segment, &data,
1433                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1434                     insn_end - offset, opx->segment, opx->wrt);
1435             } else {
1436                 data = opx->offset - insn_end;
1437                 out(offset, segment, &data,
1438                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1439             }
1440             offset += size;
1441             break;
1442
1443         case4(070):
1444             if (opx->segment != segment) {
1445                 data = opx->offset;
1446                 out(offset, segment, &data,
1447                     OUT_REL4ADR, insn_end - offset,
1448                     opx->segment, opx->wrt);
1449             } else {
1450                 data = opx->offset - insn_end;
1451                 out(offset, segment, &data,
1452                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1453             }
1454             offset += 4;
1455             break;
1456
1457         case4(074):
1458             if (opx->segment == NO_SEG)
1459                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1460                         " relocatable");
1461             data = 0;
1462             out(offset, segment, &data, OUT_ADDRESS, 2,
1463                 outfmt->segbase(1 + opx->segment),
1464                 opx->wrt);
1465             offset += 2;
1466             break;
1467
1468         case4(0140):
1469             data = opx->offset;
1470             warn_overflow_opd(opx, 2);
1471             if (is_sbyte16(opx)) {
1472                 bytes[0] = data;
1473                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1474                     NO_SEG);
1475                 offset++;
1476             } else {
1477                 out(offset, segment, &data, OUT_ADDRESS, 2,
1478                     opx->segment, opx->wrt);
1479                 offset += 2;
1480             }
1481             break;
1482
1483         case4(0144):
1484             EMIT_REX();
1485             bytes[0] = *codes++;
1486             if (is_sbyte16(opx))
1487                 bytes[0] |= 2;  /* s-bit */
1488             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1489             offset++;
1490             break;
1491
1492         case4(0150):
1493             data = opx->offset;
1494             warn_overflow_opd(opx, 4);
1495             if (is_sbyte32(opx)) {
1496                 bytes[0] = data;
1497                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1498                     NO_SEG);
1499                 offset++;
1500             } else {
1501                 out(offset, segment, &data, OUT_ADDRESS, 4,
1502                     opx->segment, opx->wrt);
1503                 offset += 4;
1504             }
1505             break;
1506
1507         case4(0154):
1508             EMIT_REX();
1509             bytes[0] = *codes++;
1510             if (is_sbyte32(opx))
1511                 bytes[0] |= 2;  /* s-bit */
1512             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1513             offset++;
1514             break;
1515
1516         case4(0160):
1517         case4(0164):
1518             break;
1519
1520         case 0171:
1521             bytes[0] =
1522                 (ins->drexdst << 4) |
1523                 (ins->rex & REX_OC ? 0x08 : 0) |
1524                 (ins->rex & (REX_R|REX_X|REX_B));
1525             ins->rex = 0;
1526             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1527             offset++;
1528             break;
1529
1530         case 0172:
1531             c = *codes++;
1532             opx = &ins->oprs[c >> 3];
1533             bytes[0] = nasm_regvals[opx->basereg] << 4;
1534             opx = &ins->oprs[c & 7];
1535             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1536                 errfunc(ERR_NONFATAL,
1537                         "non-absolute expression not permitted as argument %d",
1538                         c & 7);
1539             } else {
1540                 if (opx->offset & ~15) {
1541                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1542                             "four-bit argument exceeds bounds");
1543                 }
1544                 bytes[0] |= opx->offset & 15;
1545             }
1546             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1547             offset++;
1548             break;
1549
1550         case 0173:
1551             c = *codes++;
1552             opx = &ins->oprs[c >> 4];
1553             bytes[0] = nasm_regvals[opx->basereg] << 4;
1554             bytes[0] |= c & 15;
1555             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1556             offset++;
1557             break;
1558
1559         case 0174:
1560             c = *codes++;
1561             opx = &ins->oprs[c];
1562             bytes[0] = nasm_regvals[opx->basereg] << 4;
1563             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1564             offset++;
1565             break;
1566
1567         case4(0250):
1568             data = opx->offset;
1569             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1570                 (int32_t)data != (int64_t)data) {
1571                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1572                         "signed dword immediate exceeds bounds");
1573             }
1574             if (is_sbyte32(opx)) {
1575                 bytes[0] = data;
1576                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1577                     NO_SEG);
1578                 offset++;
1579             } else {
1580                 out(offset, segment, &data, OUT_ADDRESS, 4,
1581                     opx->segment, opx->wrt);
1582                 offset += 4;
1583             }
1584             break;
1585
1586         case4(0254):
1587             data = opx->offset;
1588             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1589                 (int32_t)data != (int64_t)data) {
1590                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1591                         "signed dword immediate exceeds bounds");
1592             }
1593             out(offset, segment, &data, OUT_ADDRESS, 4,
1594                 opx->segment, opx->wrt);
1595             offset += 4;
1596             break;
1597
1598         case4(0260):
1599         case 0270:
1600             codes += 2;
1601             if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1602                 bytes[0] = (ins->vex_cm >> 6) ? 0x8f : 0xc4;
1603                 bytes[1] = (ins->vex_cm & 31) | ((~ins->rex & 7) << 5);
1604                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1605                     ((~ins->drexdst & 15)<< 3) | (ins->vex_wlp & 07);
1606                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1607                 offset += 3;
1608             } else {
1609                 bytes[0] = 0xc5;
1610                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1611                     ((~ins->drexdst & 15) << 3) | (ins->vex_wlp & 07);
1612                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1613                 offset += 2;
1614             }
1615             break;
1616
1617         case4(0274):
1618         {
1619             uint64_t uv, um;
1620             int s;
1621
1622             if (ins->rex & REX_W)
1623                 s = 64;
1624             else if (ins->prefixes[PPS_OSIZE] == P_O16)
1625                 s = 16;
1626             else if (ins->prefixes[PPS_OSIZE] == P_O32)
1627                 s = 32;
1628             else
1629                 s = bits;
1630
1631             um = (uint64_t)2 << (s-1);
1632             uv = opx->offset;
1633
1634             if (uv > 127 && uv < (uint64_t)-128 &&
1635                 (uv < um-128 || uv > um-1)) {
1636                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1637                         "signed byte value exceeds bounds");
1638             }
1639             if (opx->segment != NO_SEG) {
1640                 data = uv;
1641                 out(offset, segment, &data, OUT_ADDRESS, 1,
1642                     opx->segment, opx->wrt);
1643             } else {
1644                 bytes[0] = uv;
1645                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1646                     NO_SEG);
1647             }
1648             offset += 1;
1649             break;
1650         }
1651
1652         case4(0300):
1653             break;
1654
1655         case 0310:
1656             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1657                 *bytes = 0x67;
1658                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1659                 offset += 1;
1660             } else
1661                 offset += 0;
1662             break;
1663
1664         case 0311:
1665             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1666                 *bytes = 0x67;
1667                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1668                 offset += 1;
1669             } else
1670                 offset += 0;
1671             break;
1672
1673         case 0312:
1674             break;
1675
1676         case 0313:
1677             ins->rex = 0;
1678             break;
1679
1680         case4(0314):
1681             break;
1682
1683         case 0320:
1684             if (bits != 16) {
1685                 *bytes = 0x66;
1686                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1687                 offset += 1;
1688             } else
1689                 offset += 0;
1690             break;
1691
1692         case 0321:
1693             if (bits == 16) {
1694                 *bytes = 0x66;
1695                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1696                 offset += 1;
1697             } else
1698                 offset += 0;
1699             break;
1700
1701         case 0322:
1702         case 0323:
1703             break;
1704
1705         case 0324:
1706             ins->rex |= REX_W;
1707             break;
1708
1709         case 0325:
1710             break;
1711
1712         case 0330:
1713             *bytes = *codes++ ^ condval[ins->condition];
1714             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1715             offset += 1;
1716             break;
1717
1718         case 0331:
1719             break;
1720
1721         case 0332:
1722         case 0333:
1723             *bytes = c - 0332 + 0xF2;
1724             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1725             offset += 1;
1726             break;
1727
1728         case 0334:
1729             if (ins->rex & REX_R) {
1730                 *bytes = 0xF0;
1731                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1732                 offset += 1;
1733             }
1734             ins->rex &= ~(REX_L|REX_R);
1735             break;
1736
1737         case 0335:
1738             break;
1739
1740         case 0336:
1741         case 0337:
1742             break;
1743
1744         case 0340:
1745             if (ins->oprs[0].segment != NO_SEG)
1746                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1747             else {
1748                 int64_t size = ins->oprs[0].offset;
1749                 if (size > 0)
1750                     out(offset, segment, NULL,
1751                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1752                 offset += size;
1753             }
1754             break;
1755
1756         case 0341:
1757             break;
1758
1759         case 0344:
1760         case 0345:
1761             bytes[0] = c & 1;
1762             switch (ins->oprs[0].basereg) {
1763             case R_CS:
1764                 bytes[0] += 0x0E;
1765                 break;
1766             case R_DS:
1767                 bytes[0] += 0x1E;
1768                 break;
1769             case R_ES:
1770                 bytes[0] += 0x06;
1771                 break;
1772             case R_SS:
1773                 bytes[0] += 0x16;
1774                 break;
1775             default:
1776                 errfunc(ERR_PANIC,
1777                         "bizarre 8086 segment register received");
1778             }
1779             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1780             offset++;
1781             break;
1782
1783         case 0346:
1784         case 0347:
1785             bytes[0] = c & 1;
1786             switch (ins->oprs[0].basereg) {
1787             case R_FS:
1788                 bytes[0] += 0xA0;
1789                 break;
1790             case R_GS:
1791                 bytes[0] += 0xA8;
1792                 break;
1793             default:
1794                 errfunc(ERR_PANIC,
1795                         "bizarre 386 segment register received");
1796             }
1797             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1798             offset++;
1799             break;
1800
1801         case 0360:
1802             break;
1803
1804         case 0361:
1805             bytes[0] = 0x66;
1806             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1807             offset += 1;
1808             break;
1809
1810         case 0362:
1811         case 0363:
1812             bytes[0] = c - 0362 + 0xf2;
1813             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1814             offset += 1;
1815             break;
1816
1817         case 0364:
1818         case 0365:
1819             break;
1820
1821         case 0366:
1822         case 0367:
1823             *bytes = c - 0366 + 0x66;
1824             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1825             offset += 1;
1826             break;
1827
1828         case 0370:
1829         case 0371:
1830         case 0372:
1831             break;
1832
1833         case 0373:
1834             *bytes = bits == 16 ? 3 : 5;
1835             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1836             offset += 1;
1837             break;
1838
1839         case4(0100):
1840         case4(0110):
1841         case4(0120):
1842         case4(0130):
1843         case4(0200):
1844         case4(0204):
1845         case4(0210):
1846         case4(0214):
1847         case4(0220):
1848         case4(0224):
1849         case4(0230):
1850         case4(0234):
1851             {
1852                 ea ea_data;
1853                 int rfield;
1854                 opflags_t rflags;
1855                 uint8_t *p;
1856                 int32_t s;
1857                 enum out_type type;
1858                 struct operand *opy = &ins->oprs[op2];
1859
1860                 if (c <= 0177) {
1861                     /* pick rfield from operand b (opx) */
1862                     rflags = regflag(opx);
1863                     rfield = nasm_regvals[opx->basereg];
1864                 } else {
1865                     /* rfield is constant */
1866                     rflags = 0;
1867                     rfield = c & 7;
1868                 }
1869
1870                 if (!process_ea(opy, &ea_data, bits, ins->addr_size,
1871                                 rfield, rflags)) {
1872                     errfunc(ERR_NONFATAL, "invalid effective address");
1873                 }
1874
1875
1876                 p = bytes;
1877                 *p++ = ea_data.modrm;
1878                 if (ea_data.sib_present)
1879                     *p++ = ea_data.sib;
1880
1881                 /* DREX suffixes come between the SIB and the displacement */
1882                 if (ins->rex & REX_D) {
1883                     *p++ = (ins->drexdst << 4) |
1884                            (ins->rex & REX_OC ? 0x08 : 0) |
1885                            (ins->rex & (REX_R|REX_X|REX_B));
1886                     ins->rex = 0;
1887                 }
1888
1889                 s = p - bytes;
1890                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1891
1892                 /*
1893                  * Make sure the address gets the right offset in case
1894                  * the line breaks in the .lst file (BR 1197827)
1895                  */
1896                 offset += s;
1897                 s = 0;
1898
1899                 switch (ea_data.bytes) {
1900                 case 0:
1901                     break;
1902                 case 1:
1903                 case 2:
1904                 case 4:
1905                 case 8:
1906                     data = opy->offset;
1907                     warn_overflow_opd(opy, ea_data.bytes);
1908                     s += ea_data.bytes;
1909                     if (ea_data.rip) {
1910                         if (opy->segment == segment) {
1911                             data -= insn_end;
1912                             out(offset, segment, &data, OUT_ADDRESS,
1913                                 ea_data.bytes, NO_SEG, NO_SEG);
1914                         } else {
1915                             out(offset, segment, &data, OUT_REL4ADR,
1916                                 insn_end - offset, opy->segment, opy->wrt);
1917                         }
1918                     } else {
1919                         type = OUT_ADDRESS;
1920                         out(offset, segment, &data, OUT_ADDRESS,
1921                             ea_data.bytes, opy->segment, opy->wrt);
1922                     }
1923                     break;
1924                 default:
1925                     /* Impossible! */
1926                     errfunc(ERR_PANIC,
1927                             "Invalid amount of bytes (%d) for offset?!",
1928                             ea_data.bytes);
1929                     break;
1930                 }
1931                 offset += s;
1932             }
1933             break;
1934
1935         default:
1936             errfunc(ERR_PANIC, "internal instruction table corrupt"
1937                     ": instruction code \\%o (0x%02X) given", c, c);
1938             break;
1939         }
1940     }
1941 }
1942
1943 static opflags_t regflag(const operand * o)
1944 {
1945     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1946         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1947     }
1948     return nasm_reg_flags[o->basereg];
1949 }
1950
1951 static int32_t regval(const operand * o)
1952 {
1953     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1954         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1955     }
1956     return nasm_regvals[o->basereg];
1957 }
1958
1959 static int op_rexflags(const operand * o, int mask)
1960 {
1961     opflags_t flags;
1962     int val;
1963
1964     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1965         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1966     }
1967
1968     flags = nasm_reg_flags[o->basereg];
1969     val = nasm_regvals[o->basereg];
1970
1971     return rexflags(val, flags, mask);
1972 }
1973
1974 static int rexflags(int val, opflags_t flags, int mask)
1975 {
1976     int rex = 0;
1977
1978     if (val >= 8)
1979         rex |= REX_B|REX_X|REX_R;
1980     if (flags & BITS64)
1981         rex |= REX_W;
1982     if (!(REG_HIGH & ~flags))   /* AH, CH, DH, BH */
1983         rex |= REX_H;
1984     else if (!(REG8 & ~flags) && val >= 4) /* SPL, BPL, SIL, DIL */
1985         rex |= REX_P;
1986
1987     return rex & mask;
1988 }
1989
1990 static enum match_result find_match(const struct itemplate **tempp,
1991                                     insn *instruction,
1992                                     int32_t segment, int64_t offset, int bits)
1993 {
1994     const struct itemplate *temp;
1995     enum match_result m, merr;
1996     opflags_t xsizeflags[MAX_OPERANDS];
1997     bool opsizemissing = false;
1998     int i;
1999
2000     for (i = 0; i < instruction->operands; i++)
2001         xsizeflags[i] = instruction->oprs[i].type & SIZE_MASK;
2002
2003     merr = MERR_INVALOP;
2004
2005     for (temp = nasm_instructions[instruction->opcode];
2006          temp->opcode != I_none; temp++) {
2007         m = matches(temp, instruction, bits);
2008         if (m == MOK_JUMP) {
2009             if (jmp_match(segment, offset, bits, instruction, temp->code))
2010                 m = MOK_GOOD;
2011             else
2012                 m = MERR_INVALOP;
2013         } else if (m == MERR_OPSIZEMISSING &&
2014                    (temp->flags & IF_SMASK) != IF_SX) {
2015             /*
2016              * Missing operand size and a candidate for fuzzy matching...
2017              */
2018             for (i = 0; i < temp->operands; i++) {
2019                 if ((temp->opd[i] & SAME_AS) == 0)
2020                     xsizeflags[i] |= temp->opd[i] & SIZE_MASK;
2021             }
2022             opsizemissing = true;
2023         }
2024         if (m > merr)
2025             merr = m;
2026         if (merr == MOK_GOOD)
2027             goto done;
2028     }
2029
2030     /* No match, but see if we can get a fuzzy operand size match... */
2031     if (!opsizemissing)
2032         goto done;
2033
2034     for (i = 0; i < instruction->operands; i++) {
2035         /*
2036          * We ignore extrinsic operand sizes on registers, so we should
2037          * never try to fuzzy-match on them.  This also resolves the case
2038          * when we have e.g. "xmmrm128" in two different positions.
2039          */
2040         if (is_class(REGISTER, instruction->oprs[i].type))
2041             continue;
2042
2043         /* This tests if xsizeflags[i] has more than one bit set */
2044         if ((xsizeflags[i] & (xsizeflags[i]-1)))
2045             goto done;          /* No luck */
2046
2047         instruction->oprs[i].type |= xsizeflags[i]; /* Set the size */
2048     }
2049
2050     /* Try matching again... */
2051     for (temp = nasm_instructions[instruction->opcode];
2052          temp->opcode != I_none; temp++) {
2053         m = matches(temp, instruction, bits);
2054         if (m == MOK_JUMP) {
2055             if (jmp_match(segment, offset, bits, instruction, temp->code))
2056                 m = MOK_GOOD;
2057             else
2058                 m = MERR_INVALOP;
2059         }
2060         if (m > merr)
2061             merr = m;
2062         if (merr == MOK_GOOD)
2063             goto done;
2064     }
2065
2066 done:
2067     *tempp = temp;
2068     return merr;
2069 }
2070
2071 static enum match_result matches(const struct itemplate *itemp,
2072                                  insn *instruction, int bits)
2073 {
2074     int i, size[MAX_OPERANDS], asize, oprs;
2075     bool opsizemissing = false;
2076
2077     /*
2078      * Check the opcode
2079      */
2080     if (itemp->opcode != instruction->opcode)
2081         return MERR_INVALOP;
2082
2083     /*
2084      * Count the operands
2085      */
2086     if (itemp->operands != instruction->operands)
2087         return MERR_INVALOP;
2088
2089     /*
2090      * Check that no spurious colons or TOs are present
2091      */
2092     for (i = 0; i < itemp->operands; i++)
2093         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
2094             return MERR_INVALOP;
2095
2096     /*
2097      * Process size flags
2098      */
2099     switch (itemp->flags & IF_SMASK) {
2100     case IF_SB:
2101         asize = BITS8;
2102         break;
2103     case IF_SW:
2104         asize = BITS16;
2105         break;
2106     case IF_SD:
2107         asize = BITS32;
2108         break;
2109     case IF_SQ:
2110         asize = BITS64;
2111         break;
2112     case IF_SO:
2113         asize = BITS128;
2114         break;
2115     case IF_SY:
2116         asize = BITS256;
2117         break;
2118     case IF_SZ:
2119         switch (bits) {
2120         case 16:
2121             asize = BITS16;
2122             break;
2123         case 32:
2124             asize = BITS32;
2125             break;
2126         case 64:
2127             asize = BITS64;
2128             break;
2129         default:
2130             asize = 0;
2131             break;
2132         }
2133         break;
2134     default:
2135         asize = 0;
2136         break;
2137     }
2138
2139     if (itemp->flags & IF_ARMASK) {
2140         /* S- flags only apply to a specific operand */
2141         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
2142         memset(size, 0, sizeof size);
2143         size[i] = asize;
2144     } else {
2145         /* S- flags apply to all operands */
2146         for (i = 0; i < MAX_OPERANDS; i++)
2147             size[i] = asize;
2148     }
2149
2150     /*
2151      * Check that the operand flags all match up
2152      */
2153     for (i = 0; i < itemp->operands; i++) {
2154         opflags_t type = instruction->oprs[i].type;
2155         if (!(type & SIZE_MASK))
2156             type |= size[i];
2157
2158         if (itemp->opd[i] & SAME_AS) {
2159             int j = itemp->opd[i] & ~SAME_AS;
2160             if (type != instruction->oprs[j].type ||
2161                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2162                 return MERR_INVALOP;
2163         } else if (itemp->opd[i] & ~type ||
2164             ((itemp->opd[i] & SIZE_MASK) &&
2165              ((itemp->opd[i] ^ type) & SIZE_MASK))) {
2166             if ((itemp->opd[i] & ~type & ~SIZE_MASK) || (type & SIZE_MASK)) {
2167                 return MERR_INVALOP;
2168             } else if (!is_class(REGISTER, type)) {
2169                 /*
2170                  * Note: we don't honor extrinsic operand sizes for registers,
2171                  * so "missing operand size" for a register should be
2172                  * considered a wildcard match rather than an error.
2173                  */
2174                 opsizemissing = true;
2175             }
2176         }
2177     }
2178
2179     if (opsizemissing)
2180         return MERR_OPSIZEMISSING;
2181
2182     /*
2183      * Check operand sizes
2184      */
2185     if (itemp->flags & (IF_SM | IF_SM2)) {
2186         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2187         asize = 0;
2188         for (i = 0; i < oprs; i++) {
2189             if ((asize = itemp->opd[i] & SIZE_MASK) != 0) {
2190                 int j;
2191                 for (j = 0; j < oprs; j++)
2192                     size[j] = asize;
2193                 break;
2194             }
2195         }
2196     } else {
2197         oprs = itemp->operands;
2198     }
2199
2200     for (i = 0; i < itemp->operands; i++) {
2201         if (!(itemp->opd[i] & SIZE_MASK) &&
2202             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2203             return MERR_OPSIZEMISMATCH;
2204     }
2205
2206     /*
2207      * Check template is okay at the set cpu level
2208      */
2209     if (((itemp->flags & IF_PLEVEL) > cpu))
2210         return MERR_BADCPU;
2211
2212     /*
2213      * Verify the appropriate long mode flag.
2214      */
2215     if ((itemp->flags & (bits == 64 ? IF_NOLONG : IF_LONG)))
2216         return MERR_BADMODE;
2217
2218     /*
2219      * Check if special handling needed for Jumps
2220      */
2221     if ((itemp->code[0] & 0374) == 0370)
2222         return MOK_JUMP;
2223
2224     return MOK_GOOD;
2225 }
2226
2227 static ea *process_ea(operand * input, ea * output, int bits,
2228                       int addrbits, int rfield, opflags_t rflags)
2229 {
2230     bool forw_ref = !!(input->opflags & OPFLAG_UNKNOWN);
2231
2232     output->rip = false;
2233
2234     /* REX flags for the rfield operand */
2235     output->rex |= rexflags(rfield, rflags, REX_R|REX_P|REX_W|REX_H);
2236
2237     if (is_class(REGISTER, input->type)) {  /* register direct */
2238         int i;
2239         opflags_t f;
2240
2241         if (input->basereg < EXPR_REG_START /* Verify as Register */
2242             || input->basereg >= REG_ENUM_LIMIT)
2243             return NULL;
2244         f = regflag(input);
2245         i = nasm_regvals[input->basereg];
2246
2247         if (REG_EA & ~f)
2248             return NULL;        /* Invalid EA register */
2249
2250         output->rex |= op_rexflags(input, REX_B|REX_P|REX_W|REX_H);
2251
2252         output->sib_present = false;             /* no SIB necessary */
2253         output->bytes = 0;  /* no offset necessary either */
2254         output->modrm = 0xC0 | ((rfield & 7) << 3) | (i & 7);
2255     } else {                    /* it's a memory reference */
2256         if (input->basereg == -1
2257             && (input->indexreg == -1 || input->scale == 0)) {
2258             /* it's a pure offset */
2259             if (bits == 64 && (~input->type & IP_REL)) {
2260               int scale, index, base;
2261               output->sib_present = true;
2262               scale = 0;
2263               index = 4;
2264               base = 5;
2265               output->sib = (scale << 6) | (index << 3) | base;
2266               output->bytes = 4;
2267               output->modrm = 4 | ((rfield & 7) << 3);
2268               output->rip = false;
2269             } else {
2270               output->sib_present = false;
2271               output->bytes = (addrbits != 16 ? 4 : 2);
2272               output->modrm = (addrbits != 16 ? 5 : 6) | ((rfield & 7) << 3);
2273               output->rip = bits == 64;
2274             }
2275         } else {                /* it's an indirection */
2276             int i = input->indexreg, b = input->basereg, s = input->scale;
2277             int32_t o = input->offset, seg = input->segment;
2278             int hb = input->hintbase, ht = input->hinttype;
2279             int t, it, bt;              /* register numbers */
2280             opflags_t x, ix, bx;        /* register flags */
2281
2282             if (s == 0)
2283                 i = -1;         /* make this easy, at least */
2284
2285             if (i >= EXPR_REG_START && i < REG_ENUM_LIMIT) {
2286                 it = nasm_regvals[i];
2287                 ix = nasm_reg_flags[i];
2288             } else {
2289                 it = -1;
2290                 ix = 0;
2291             }
2292
2293             if (b >= EXPR_REG_START && b < REG_ENUM_LIMIT) {
2294                 bt = nasm_regvals[b];
2295                 bx = nasm_reg_flags[b];
2296             } else {
2297                 bt = -1;
2298                 bx = 0;
2299             }
2300
2301             /* check for a 32/64-bit memory reference... */
2302             if ((ix|bx) & (BITS32|BITS64)) {
2303                 /* it must be a 32/64-bit memory reference. Firstly we have
2304                  * to check that all registers involved are type E/Rxx. */
2305                 int32_t sok = BITS32|BITS64;
2306
2307                 if (it != -1) {
2308                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2309                         sok &= ix;
2310                     else
2311                         return NULL;
2312                 }
2313
2314                 if (bt != -1) {
2315                     if (REG_GPR & ~bx)
2316                         return NULL; /* Invalid register */
2317                     if (~sok & bx & SIZE_MASK)
2318                         return NULL; /* Invalid size */
2319                     sok &= bx;
2320                 }
2321
2322                 /* While we're here, ensure the user didn't specify
2323                    WORD or QWORD. */
2324                 if (input->disp_size == 16 || input->disp_size == 64)
2325                     return NULL;
2326
2327                 if (addrbits == 16 ||
2328                     (addrbits == 32 && !(sok & BITS32)) ||
2329                     (addrbits == 64 && !(sok & BITS64)))
2330                     return NULL;
2331
2332                 /* now reorganize base/index */
2333                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2334                     ((hb == b && ht == EAH_NOTBASE)
2335                      || (hb == i && ht == EAH_MAKEBASE))) {
2336                     /* swap if hints say so */
2337                     t = bt, bt = it, it = t;
2338                     x = bx, bx = ix, ix = x;
2339                 }
2340                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2341                     bt = -1, bx = 0, s++;
2342                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2343                     /* make single reg base, unless hint */
2344                     bt = it, bx = ix, it = -1, ix = 0;
2345                 }
2346                 if (((s == 2 && it != REG_NUM_ESP
2347                       && !(input->eaflags & EAF_TIMESTWO)) || s == 3
2348                      || s == 5 || s == 9) && bt == -1)
2349                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2350                 if (it == -1 && (bt & 7) != REG_NUM_ESP
2351                     && (input->eaflags & EAF_TIMESTWO))
2352                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2353                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2354                 if (s == 1 && it == REG_NUM_ESP) {
2355                     /* swap ESP into base if scale is 1 */
2356                     t = it, it = bt, bt = t;
2357                     x = ix, ix = bx, bx = x;
2358                 }
2359                 if (it == REG_NUM_ESP
2360                     || (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2361                     return NULL;        /* wrong, for various reasons */
2362
2363                 output->rex |= rexflags(it, ix, REX_X);
2364                 output->rex |= rexflags(bt, bx, REX_B);
2365
2366                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2367                     /* no SIB needed */
2368                     int mod, rm;
2369
2370                     if (bt == -1) {
2371                         rm = 5;
2372                         mod = 0;
2373                     } else {
2374                         rm = (bt & 7);
2375                         if (rm != REG_NUM_EBP && o == 0 &&
2376                                 seg == NO_SEG && !forw_ref &&
2377                                 !(input->eaflags &
2378                                   (EAF_BYTEOFFS | EAF_WORDOFFS)))
2379                             mod = 0;
2380                         else if (input->eaflags & EAF_BYTEOFFS ||
2381                                  (o >= -128 && o <= 127 && seg == NO_SEG
2382                                   && !forw_ref
2383                                   && !(input->eaflags & EAF_WORDOFFS)))
2384                             mod = 1;
2385                         else
2386                             mod = 2;
2387                     }
2388
2389                     output->sib_present = false;
2390                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2391                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2392                 } else {
2393                     /* we need a SIB */
2394                     int mod, scale, index, base;
2395
2396                     if (it == -1)
2397                         index = 4, s = 1;
2398                     else
2399                         index = (it & 7);
2400
2401                     switch (s) {
2402                     case 1:
2403                         scale = 0;
2404                         break;
2405                     case 2:
2406                         scale = 1;
2407                         break;
2408                     case 4:
2409                         scale = 2;
2410                         break;
2411                     case 8:
2412                         scale = 3;
2413                         break;
2414                     default:   /* then what the smeg is it? */
2415                         return NULL;    /* panic */
2416                     }
2417
2418                     if (bt == -1) {
2419                         base = 5;
2420                         mod = 0;
2421                     } else {
2422                         base = (bt & 7);
2423                         if (base != REG_NUM_EBP && o == 0 &&
2424                                     seg == NO_SEG && !forw_ref &&
2425                                     !(input->eaflags &
2426                                       (EAF_BYTEOFFS | EAF_WORDOFFS)))
2427                             mod = 0;
2428                         else if (input->eaflags & EAF_BYTEOFFS ||
2429                                  (o >= -128 && o <= 127 && seg == NO_SEG
2430                                   && !forw_ref
2431                                   && !(input->eaflags & EAF_WORDOFFS)))
2432                             mod = 1;
2433                         else
2434                             mod = 2;
2435                     }
2436
2437                     output->sib_present = true;
2438                     output->bytes =  (bt == -1 || mod == 2 ? 4 : mod);
2439                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | 4;
2440                     output->sib = (scale << 6) | (index << 3) | base;
2441                 }
2442             } else {            /* it's 16-bit */
2443                 int mod, rm;
2444
2445                 /* check for 64-bit long mode */
2446                 if (addrbits == 64)
2447                     return NULL;
2448
2449                 /* check all registers are BX, BP, SI or DI */
2450                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI
2451                      && b != R_DI) || (i != -1 && i != R_BP && i != R_BX
2452                                        && i != R_SI && i != R_DI))
2453                     return NULL;
2454
2455                 /* ensure the user didn't specify DWORD/QWORD */
2456                 if (input->disp_size == 32 || input->disp_size == 64)
2457                     return NULL;
2458
2459                 if (s != 1 && i != -1)
2460                     return NULL;        /* no can do, in 16-bit EA */
2461                 if (b == -1 && i != -1) {
2462                     int tmp = b;
2463                     b = i;
2464                     i = tmp;
2465                 }               /* swap */
2466                 if ((b == R_SI || b == R_DI) && i != -1) {
2467                     int tmp = b;
2468                     b = i;
2469                     i = tmp;
2470                 }
2471                 /* have BX/BP as base, SI/DI index */
2472                 if (b == i)
2473                     return NULL;        /* shouldn't ever happen, in theory */
2474                 if (i != -1 && b != -1 &&
2475                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2476                     return NULL;        /* invalid combinations */
2477                 if (b == -1)    /* pure offset: handled above */
2478                     return NULL;        /* so if it gets to here, panic! */
2479
2480                 rm = -1;
2481                 if (i != -1)
2482                     switch (i * 256 + b) {
2483                     case R_SI * 256 + R_BX:
2484                         rm = 0;
2485                         break;
2486                     case R_DI * 256 + R_BX:
2487                         rm = 1;
2488                         break;
2489                     case R_SI * 256 + R_BP:
2490                         rm = 2;
2491                         break;
2492                     case R_DI * 256 + R_BP:
2493                         rm = 3;
2494                         break;
2495                 } else
2496                     switch (b) {
2497                     case R_SI:
2498                         rm = 4;
2499                         break;
2500                     case R_DI:
2501                         rm = 5;
2502                         break;
2503                     case R_BP:
2504                         rm = 6;
2505                         break;
2506                     case R_BX:
2507                         rm = 7;
2508                         break;
2509                     }
2510                 if (rm == -1)   /* can't happen, in theory */
2511                     return NULL;        /* so panic if it does */
2512
2513                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2514                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2515                     mod = 0;
2516                 else if (input->eaflags & EAF_BYTEOFFS ||
2517                          (o >= -128 && o <= 127 && seg == NO_SEG
2518                           && !forw_ref
2519                           && !(input->eaflags & EAF_WORDOFFS)))
2520                     mod = 1;
2521                 else
2522                     mod = 2;
2523
2524                 output->sib_present = false;    /* no SIB - it's 16-bit */
2525                 output->bytes = mod;    /* bytes of offset needed */
2526                 output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2527             }
2528         }
2529     }
2530
2531     output->size = 1 + output->sib_present + output->bytes;
2532     return output;
2533 }
2534
2535 static void add_asp(insn *ins, int addrbits)
2536 {
2537     int j, valid;
2538     int defdisp;
2539
2540     valid = (addrbits == 64) ? 64|32 : 32|16;
2541
2542     switch (ins->prefixes[PPS_ASIZE]) {
2543     case P_A16:
2544         valid &= 16;
2545         break;
2546     case P_A32:
2547         valid &= 32;
2548         break;
2549     case P_A64:
2550         valid &= 64;
2551         break;
2552     case P_ASP:
2553         valid &= (addrbits == 32) ? 16 : 32;
2554         break;
2555     default:
2556         break;
2557     }
2558
2559     for (j = 0; j < ins->operands; j++) {
2560         if (is_class(MEMORY, ins->oprs[j].type)) {
2561             opflags_t i, b;
2562
2563             /* Verify as Register */
2564             if (ins->oprs[j].indexreg < EXPR_REG_START
2565                 || ins->oprs[j].indexreg >= REG_ENUM_LIMIT)
2566                 i = 0;
2567             else
2568                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2569
2570             /* Verify as Register */
2571             if (ins->oprs[j].basereg < EXPR_REG_START
2572                 || ins->oprs[j].basereg >= REG_ENUM_LIMIT)
2573                 b = 0;
2574             else
2575                 b = nasm_reg_flags[ins->oprs[j].basereg];
2576
2577             if (ins->oprs[j].scale == 0)
2578                 i = 0;
2579
2580             if (!i && !b) {
2581                 int ds = ins->oprs[j].disp_size;
2582                 if ((addrbits != 64 && ds > 8) ||
2583                     (addrbits == 64 && ds == 16))
2584                     valid &= ds;
2585             } else {
2586                 if (!(REG16 & ~b))
2587                     valid &= 16;
2588                 if (!(REG32 & ~b))
2589                     valid &= 32;
2590                 if (!(REG64 & ~b))
2591                     valid &= 64;
2592
2593                 if (!(REG16 & ~i))
2594                     valid &= 16;
2595                 if (!(REG32 & ~i))
2596                     valid &= 32;
2597                 if (!(REG64 & ~i))
2598                     valid &= 64;
2599             }
2600         }
2601     }
2602
2603     if (valid & addrbits) {
2604         ins->addr_size = addrbits;
2605     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2606         /* Add an address size prefix */
2607         enum prefixes pref = (addrbits == 32) ? P_A16 : P_A32;
2608         ins->prefixes[PPS_ASIZE] = pref;
2609         ins->addr_size = (addrbits == 32) ? 16 : 32;
2610     } else {
2611         /* Impossible... */
2612         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2613         ins->addr_size = addrbits; /* Error recovery */
2614     }
2615
2616     defdisp = ins->addr_size == 16 ? 16 : 32;
2617
2618     for (j = 0; j < ins->operands; j++) {
2619         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2620             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp)
2621             != ins->addr_size) {
2622             /* mem_offs sizes must match the address size; if not,
2623                strip the MEM_OFFS bit and match only EA instructions */
2624             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2625         }
2626     }
2627 }