assemble.c

   1 /* ----------------------------------------------------------------------- *
   2  *
   3  *   Copyright 1996-2009 The NASM Authors - All Rights Reserved
   4  *   See the file AUTHORS included with the NASM distribution for
   5  *   the specific copyright holders.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following
   9  *   conditions are met:
  10  *
  11  *   * Redistributions of source code must retain the above copyright
  12  *     notice, this list of conditions and the following disclaimer.
  13  *   * Redistributions in binary form must reproduce the above
  14  *     copyright notice, this list of conditions and the following
  15  *     disclaimer in the documentation and/or other materials provided
  16  *     with the distribution.
  17  *
  18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  19  *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  20  *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  21  *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22  *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  23  *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24  *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  25  *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28  *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  29  *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  30  *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31  *
  32  * ----------------------------------------------------------------------- */
  33
  34 /*
  35  * assemble.c   code generation for the Netwide Assembler
  36  *
  37  * the actual codes (C syntax, i.e. octal):
  38  * \0            - terminates the code. (Unless it's a literal of course.)
  39  * \1..\4        - that many literal bytes follow in the code stream
  40  * \5            - add 4 to the primary operand number (b, low octdigit)
  41  * \6            - add 4 to the secondary operand number (a, middle octdigit)
  42  * \7            - add 4 to both the primary and the secondary operand number
  43  * \10..\13      - a literal byte follows in the code stream, to be added
  44  *                 to the register value of operand 0..3
  45  * \14..\17      - a signed byte immediate operand, from operand 0..3
  46  * \20..\23      - a byte immediate operand, from operand 0..3
  47  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  48  * \30..\33      - a word immediate operand, from operand 0..3
  49  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  50  *                 assembly mode or the operand-size override on the operand
  51  * \40..\43      - a long immediate operand, from operand 0..3
  52  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  53  *                 depending on the address size of the instruction.
  54  * \50..\53      - a byte relative operand, from operand 0..3
  55  * \54..\57      - a qword immediate operand, from operand 0..3
  56  * \60..\63      - a word relative operand, from operand 0..3
  57  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  58  *                 assembly mode or the operand-size override on the operand
  59  * \70..\73      - a long relative operand, from operand 0..3
  60  * \74..\77      - a word constant, from the _segment_ part of operand 0..3
  61  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  62  *                 field the register value of operand b.
  63  * \140..\143    - an immediate word or signed byte for operand 0..3
  64  * \144..\147    - or 2 (s-field) into opcode byte if operand 0..3
  65  *                  is a signed byte rather than a word.  Opcode byte follows.
  66  * \150..\153    - an immediate dword or signed byte for operand 0..3
  67  * \154..\157    - or 2 (s-field) into opcode byte if operand 0..3
  68  *                  is a signed byte rather than a dword.  Opcode byte follows.
  69  * \160..\163    - this instruction uses DREX rather than REX, with the
  70  *                 OC0 field set to 0, and the dest field taken from
  71  *                 operand 0..3.
  72  * \164..\167    - this instruction uses DREX rather than REX, with the
  73  *                 OC0 field set to 1, and the dest field taken from
  74  *                 operand 0..3.
  75  * \171          - placement of DREX suffix in the absence of an EA
  76  * \172\ab       - the register number from operand a in bits 7..4, with
  77  *                 the 4-bit immediate from operand b in bits 3..0.
  78  * \173\xab      - the register number from operand a in bits 7..4, with
  79  *                 the value b in bits 3..0.
  80  * \174\a        - the register number from operand a in bits 7..4, and
  81  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  82  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  83  *                 field equal to digit b.
  84  * \250..\253    - same as \150..\153, except warn if the 64-bit operand
  85  *                 is not equal to the truncated and sign-extended 32-bit
  86  *                 operand; used for 32-bit immediates in 64-bit mode.
  87  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
  88  * \260..\263    - this instruction uses VEX/XOP rather than REX, with the
  89  *                 V field taken from operand 0..3.
  90  * \270          - this instruction uses VEX/XOP rather than REX, with the
  91  *                 V field set to 1111b.
  92  *
  93  * VEX/XOP prefixes are followed by the sequence:
  94  * \tmm\wlp        where mm is the M field; and wlp is:
  95  *                 00 0ww lpp
  96  *                 [w0] ww = 0 for W = 0
  97  *                 [w1] ww = 1 for W = 1
  98  *                 [wx] ww = 2 for W don't care (always assembled as 0)
  99  *                 [ww] ww = 3 for W used as REX.W
 100  *
 101  * t = 0 for VEX (C4/C5), t = 1 for XOP (8F).
 102  *
 103  * \274..\277    - a signed byte immediate operand, from operand 0..3,
 104  *                 which is to be extended to the operand size.
 105  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
 106  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
 107  * \312          - (disassembler only) invalid with non-default address size.
 108  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
 109  * \314          - (disassembler only) invalid with REX.B
 110  * \315          - (disassembler only) invalid with REX.X
 111  * \316          - (disassembler only) invalid with REX.R
 112  * \317          - (disassembler only) invalid with REX.W
 113  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
 114  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
 115  * \322          - indicates that this instruction is only valid when the
 116  *                 operand size is the default (instruction to disassembler,
 117  *                 generates no code in the assembler)
 118  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
 119  * \324          - indicates 64-bit operand size requiring REX prefix.
 120  * \325          - instruction which always uses spl/bpl/sil/dil
 121  * \330          - a literal byte follows in the code stream, to be added
 122  *                 to the condition code value of the instruction.
 123  * \331          - instruction not valid with REP prefix.  Hint for
 124  *                 disassembler only; for SSE instructions.
 125  * \332          - REP prefix (0xF2 byte) used as opcode extension.
 126  * \333          - REP prefix (0xF3 byte) used as opcode extension.
 127  * \334          - LOCK prefix used as REX.R (used in non-64-bit mode)
 128  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
 129  * \336          - force a REP(E) prefix (0xF2) even if not specified.
 130  * \337          - force a REPNE prefix (0xF3) even if not specified.
 131  *                 \336-\337 are still listed as prefixes in the disassembler.
 132  * \340          - reserve <operand 0> bytes of uninitialized storage.
 133  *                 Operand 0 had better be a segmentless constant.
 134  * \341          - this instruction needs a WAIT "prefix"
 135  * \344,\345     - the PUSH/POP (respectively) codes for CS, DS, ES, SS
 136  *                 (POP is never used for CS) depending on operand 0
 137  * \346,\347     - the second byte of PUSH/POP codes for FS, GS, depending
 138  *                 on operand 0
 139  * \360          - no SSE prefix (== \364\331)
 140  * \361          - 66 SSE prefix (== \366\331)
 141  * \362          - F2 SSE prefix (== \364\332)
 142  * \363          - F3 SSE prefix (== \364\333)
 143  * \364          - operand-size prefix (0x66) not permitted
 144  * \365          - address-size prefix (0x67) not permitted
 145  * \366          - operand-size prefix (0x66) used as opcode extension
 146  * \367          - address-size prefix (0x67) used as opcode extension
 147  * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
 148  *                 370 is used for Jcc, 371 is used for JMP.
 149  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 150  *                 used for conditional jump over longer jump
 151  */
 152
 153 #include "compiler.h"
 154
 155 #include <stdio.h>
 156 #include <string.h>
 157 #include <inttypes.h>
 158
 159 #include "nasm.h"
 160 #include "nasmlib.h"
 161 #include "assemble.h"
 162 #include "insns.h"
 163 #include "tables.h"
 164
 165 enum match_result {
 166     /*
 167      * Matching errors.  These should be sorted so that more specific
 168      * errors come later in the sequence.
 169      */
 170     MERR_INVALOP,
 171     MERR_OPSIZEMISSING,
 172     MERR_OPSIZEMISMATCH,
 173     MERR_BADCPU,
 174     MERR_BADMODE,
 175     /*
 176      * Matching success; the conditional ones first
 177      */
 178     MOK_JUMP,                   /* Matching OK but needs jmp_match() */
 179     MOK_GOOD                    /* Matching unconditionally OK */
 180 };
 181
 182 typedef struct {
 183     int sib_present;                 /* is a SIB byte necessary? */
 184     int bytes;                       /* # of bytes of offset needed */
 185     int size;                        /* lazy - this is sib+bytes+1 */
 186     uint8_t modrm, sib, rex, rip;    /* the bytes themselves */
 187 } ea;
 188
 189 static uint32_t cpu;            /* cpu level received from nasm.c */
 190 static efunc errfunc;
 191 static struct ofmt *outfmt;
 192 static ListGen *list;
 193
 194 static int64_t calcsize(int32_t, int64_t, int, insn *, const uint8_t *);
 195 static void gencode(int32_t segment, int64_t offset, int bits,
 196                     insn * ins, const struct itemplate *temp,
 197                     int64_t insn_end);
 198 static enum match_result find_match(const struct itemplate **tempp,
 199                                     insn *instruction,
 200                                     int32_t segment, int64_t offset, int bits);
 201 static enum match_result matches(const struct itemplate *, insn *, int bits);
 202 static int32_t regflag(const operand *);
 203 static int32_t regval(const operand *);
 204 static int rexflags(int, int32_t, int);
 205 static int op_rexflags(const operand *, int);
 206 static ea *process_ea(operand *, ea *, int, int, int, int32_t);
 207 static void add_asp(insn *, int);
 208
 209 static int has_prefix(insn * ins, enum prefix_pos pos, enum prefixes prefix)
 210 {
 211     return ins->prefixes[pos] == prefix;
 212 }
 213
 214 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 215 {
 216     if (ins->prefixes[pos])
 217         errfunc(ERR_NONFATAL, "invalid %s prefix",
 218                 prefix_name(ins->prefixes[pos]));
 219 }
 220
 221 static const char *size_name(int size)
 222 {
 223     switch (size) {
 224     case 1:
 225         return "byte";
 226     case 2:
 227         return "word";
 228     case 4:
 229         return "dword";
 230     case 8:
 231         return "qword";
 232     case 10:
 233         return "tword";
 234     case 16:
 235         return "oword";
 236     case 32:
 237         return "yword";
 238     default:
 239         return "???";
 240     }
 241 }
 242
 243 static void warn_overflow(int size, const struct operand *o)
 244 {
 245     if (size < 8 && o->wrt == NO_SEG && o->segment == NO_SEG) {
 246         int64_t lim = ((int64_t)1 << (size*8))-1;
 247         int64_t data = o->offset;
 248
 249         if (data < ~lim || data > lim)
 250             errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
 251                     "%s data exceeds bounds", size_name(size));
 252     }
 253 }
 254 /*
 255  * This routine wrappers the real output format's output routine,
 256  * in order to pass a copy of the data off to the listing file
 257  * generator at the same time.
 258  */
 259 static void out(int64_t offset, int32_t segto, const void *data,
 260                 enum out_type type, uint64_t size,
 261                 int32_t segment, int32_t wrt)
 262 {
 263     static int32_t lineno = 0;     /* static!!! */
 264     static char *lnfname = NULL;
 265     uint8_t p[8];
 266
 267     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 268         /*
 269          * This is a non-relocated address, and we're going to
 270          * convert it into RAWDATA format.
 271          */
 272         uint8_t *q = p;
 273
 274         if (size > 8) {
 275             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 276             return;
 277         }
 278
 279         WRITEADDR(q, *(int64_t *)data, size);
 280         data = p;
 281         type = OUT_RAWDATA;
 282     }
 283
 284     list->output(offset, data, type, size);
 285
 286     /*
 287      * this call to src_get determines when we call the
 288      * debug-format-specific "linenum" function
 289      * it updates lineno and lnfname to the current values
 290      * returning 0 if "same as last time", -2 if lnfname
 291      * changed, and the amount by which lineno changed,
 292      * if it did. thus, these variables must be static
 293      */
 294
 295     if (src_get(&lineno, &lnfname)) {
 296         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 297     }
 298
 299     outfmt->output(segto, data, type, size, segment, wrt);
 300 }
 301
 302 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 303                      insn * ins, const uint8_t *code)
 304 {
 305     int64_t isize;
 306     uint8_t c = code[0];
 307
 308     if ((c != 0370 && c != 0371) || (ins->oprs[0].type & STRICT))
 309         return false;
 310     if (!optimizing)
 311         return false;
 312     if (optimizing < 0 && c == 0371)
 313         return false;
 314
 315     isize = calcsize(segment, offset, bits, ins, code);
 316
 317     if (ins->oprs[0].opflags & OPFLAG_UNKNOWN)
 318         /* Be optimistic in pass 1 */
 319         return true;
 320
 321     if (ins->oprs[0].segment != segment)
 322         return false;
 323
 324     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 325     return (isize >= -128 && isize <= 127); /* is it byte size? */
 326 }
 327
 328 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 329                  insn * instruction, struct ofmt *output, efunc error,
 330                  ListGen * listgen)
 331 {
 332     const struct itemplate *temp;
 333     int j;
 334     enum match_result m;
 335     int64_t insn_end;
 336     int32_t itimes;
 337     int64_t start = offset;
 338     int64_t wsize = 0;             /* size for DB etc. */
 339
 340     errfunc = error;            /* to pass to other functions */
 341     cpu = cp;
 342     outfmt = output;            /* likewise */
 343     list = listgen;             /* and again */
 344
 345     switch (instruction->opcode) {
 346     case -1:
 347         return 0;
 348     case I_DB:
 349         wsize = 1;
 350         break;
 351     case I_DW:
 352         wsize = 2;
 353         break;
 354     case I_DD:
 355         wsize = 4;
 356         break;
 357     case I_DQ:
 358         wsize = 8;
 359         break;
 360     case I_DT:
 361         wsize = 10;
 362         break;
 363     case I_DO:
 364         wsize = 16;
 365         break;
 366     case I_DY:
 367         wsize = 32;
 368         break;
 369     default:
 370         break;
 371     }
 372
 373     if (wsize) {
 374         extop *e;
 375         int32_t t = instruction->times;
 376         if (t < 0)
 377             errfunc(ERR_PANIC,
 378                     "instruction->times < 0 (%ld) in assemble()", t);
 379
 380         while (t--) {           /* repeat TIMES times */
 381             list_for_each(e, instruction->eops) {
 382                 if (e->type == EOT_DB_NUMBER) {
 383                     if (wsize == 1) {
 384                         if (e->segment != NO_SEG)
 385                             errfunc(ERR_NONFATAL,
 386                                     "one-byte relocation attempted");
 387                         else {
 388                             uint8_t out_byte = e->offset;
 389                             out(offset, segment, &out_byte,
 390                                 OUT_RAWDATA, 1, NO_SEG, NO_SEG);
 391                         }
 392                     } else if (wsize > 8) {
 393                         errfunc(ERR_NONFATAL,
 394                                 "integer supplied to a DT, DO or DY"
 395                                 " instruction");
 396                     } else
 397                         out(offset, segment, &e->offset,
 398                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 399                     offset += wsize;
 400                 } else if (e->type == EOT_DB_STRING ||
 401                            e->type == EOT_DB_STRING_FREE) {
 402                     int align;
 403
 404                     out(offset, segment, e->stringval,
 405                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 406                     align = e->stringlen % wsize;
 407
 408                     if (align) {
 409                         align = wsize - align;
 410                         out(offset, segment, zero_buffer,
 411                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 412                     }
 413                     offset += e->stringlen + align;
 414                 }
 415             }
 416             if (t > 0 && t == instruction->times - 1) {
 417                 /*
 418                  * Dummy call to list->output to give the offset to the
 419                  * listing module.
 420                  */
 421                 list->output(offset, NULL, OUT_RAWDATA, 0);
 422                 list->uplevel(LIST_TIMES);
 423             }
 424         }
 425         if (instruction->times > 1)
 426             list->downlevel(LIST_TIMES);
 427         return offset - start;
 428     }
 429
 430     if (instruction->opcode == I_INCBIN) {
 431         const char *fname = instruction->eops->stringval;
 432         FILE *fp;
 433
 434         fp = fopen(fname, "rb");
 435         if (!fp) {
 436             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 437                   fname);
 438         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 439             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 440                   fname);
 441         } else {
 442             static char buf[4096];
 443             size_t t = instruction->times;
 444             size_t base = 0;
 445             size_t len;
 446
 447             len = ftell(fp);
 448             if (instruction->eops->next) {
 449                 base = instruction->eops->next->offset;
 450                 len -= base;
 451                 if (instruction->eops->next->next &&
 452                     len > (size_t)instruction->eops->next->next->offset)
 453                     len = (size_t)instruction->eops->next->next->offset;
 454             }
 455             /*
 456              * Dummy call to list->output to give the offset to the
 457              * listing module.
 458              */
 459             list->output(offset, NULL, OUT_RAWDATA, 0);
 460             list->uplevel(LIST_INCBIN);
 461             while (t--) {
 462                 size_t l;
 463
 464                 fseek(fp, base, SEEK_SET);
 465                 l = len;
 466                 while (l > 0) {
 467                     int32_t m;
 468                     m = fread(buf, 1, l > sizeof(buf) ? sizeof(buf) : l, fp);
 469                     if (!m) {
 470                         /*
 471                          * This shouldn't happen unless the file
 472                          * actually changes while we are reading
 473                          * it.
 474                          */
 475                         error(ERR_NONFATAL,
 476                               "`incbin': unexpected EOF while"
 477                               " reading file `%s'", fname);
 478                         t = 0;  /* Try to exit cleanly */
 479                         break;
 480                     }
 481                     out(offset, segment, buf, OUT_RAWDATA, m,
 482                         NO_SEG, NO_SEG);
 483                     l -= m;
 484                 }
 485             }
 486             list->downlevel(LIST_INCBIN);
 487             if (instruction->times > 1) {
 488                 /*
 489                  * Dummy call to list->output to give the offset to the
 490                  * listing module.
 491                  */
 492                 list->output(offset, NULL, OUT_RAWDATA, 0);
 493                 list->uplevel(LIST_TIMES);
 494                 list->downlevel(LIST_TIMES);
 495             }
 496             fclose(fp);
 497             return instruction->times * len;
 498         }
 499         return 0;               /* if we're here, there's an error */
 500     }
 501
 502     /* Check to see if we need an address-size prefix */
 503     add_asp(instruction, bits);
 504
 505     m = find_match(&temp, instruction, segment, offset, bits);
 506
 507     if (m == MOK_GOOD) {
 508         /* Matches! */
 509         int64_t insn_size = calcsize(segment, offset, bits,
 510                                      instruction, temp->code);
 511         itimes = instruction->times;
 512         if (insn_size < 0)  /* shouldn't be, on pass two */
 513             error(ERR_PANIC, "errors made it through from pass one");
 514         else
 515             while (itimes--) {
 516                 for (j = 0; j < MAXPREFIX; j++) {
 517                     uint8_t c = 0;
 518                     switch (instruction->prefixes[j]) {
 519                     case P_WAIT:
 520                         c = 0x9B;
 521                         break;
 522                     case P_LOCK:
 523                         c = 0xF0;
 524                         break;
 525                     case P_REPNE:
 526                     case P_REPNZ:
 527                         c = 0xF2;
 528                         break;
 529                     case P_REPE:
 530                     case P_REPZ:
 531                     case P_REP:
 532                         c = 0xF3;
 533                         break;
 534                     case R_CS:
 535                         if (bits == 64) {
 536                             error(ERR_WARNING | ERR_PASS2,
 537                                   "cs segment base generated, but will be ignored in 64-bit mode");
 538                         }
 539                         c = 0x2E;
 540                         break;
 541                     case R_DS:
 542                         if (bits == 64) {
 543                             error(ERR_WARNING | ERR_PASS2,
 544                                   "ds segment base generated, but will be ignored in 64-bit mode");
 545                         }
 546                         c = 0x3E;
 547                         break;
 548                     case R_ES:
 549                         if (bits == 64) {
 550                             error(ERR_WARNING | ERR_PASS2,
 551                                   "es segment base generated, but will be ignored in 64-bit mode");
 552                         }
 553                         c = 0x26;
 554                         break;
 555                     case R_FS:
 556                         c = 0x64;
 557                         break;
 558                     case R_GS:
 559                         c = 0x65;
 560                         break;
 561                     case R_SS:
 562                         if (bits == 64) {
 563                             error(ERR_WARNING | ERR_PASS2,
 564                                   "ss segment base generated, but will be ignored in 64-bit mode");
 565                         }
 566                         c = 0x36;
 567                         break;
 568                     case R_SEGR6:
 569                     case R_SEGR7:
 570                         error(ERR_NONFATAL,
 571                               "segr6 and segr7 cannot be used as prefixes");
 572                         break;
 573                     case P_A16:
 574                         if (bits == 64) {
 575                             error(ERR_NONFATAL,
 576                                   "16-bit addressing is not supported "
 577                                   "in 64-bit mode");
 578                         } else if (bits != 16)
 579                             c = 0x67;
 580                         break;
 581                     case P_A32:
 582                         if (bits != 32)
 583                             c = 0x67;
 584                         break;
 585                     case P_A64:
 586                         if (bits != 64) {
 587                             error(ERR_NONFATAL,
 588                                   "64-bit addressing is only supported "
 589                                   "in 64-bit mode");
 590                         }
 591                         break;
 592                     case P_ASP:
 593                         c = 0x67;
 594                         break;
 595                     case P_O16:
 596                         if (bits != 16)
 597                             c = 0x66;
 598                         break;
 599                     case P_O32:
 600                         if (bits == 16)
 601                             c = 0x66;
 602                         break;
 603                     case P_O64:
 604                         /* REX.W */
 605                         break;
 606                     case P_OSP:
 607                         c = 0x66;
 608                         break;
 609                     case P_none:
 610                         break;
 611                     default:
 612                         error(ERR_PANIC, "invalid instruction prefix");
 613                     }
 614                     if (c != 0) {
 615                         out(offset, segment, &c, OUT_RAWDATA, 1,
 616                             NO_SEG, NO_SEG);
 617                         offset++;
 618                     }
 619                 }
 620                 insn_end = offset + insn_size;
 621                 gencode(segment, offset, bits, instruction,
 622                         temp, insn_end);
 623                 offset += insn_size;
 624                 if (itimes > 0 && itimes == instruction->times - 1) {
 625                     /*
 626                      * Dummy call to list->output to give the offset to the
 627                      * listing module.
 628                      */
 629                     list->output(offset, NULL, OUT_RAWDATA, 0);
 630                     list->uplevel(LIST_TIMES);
 631                 }
 632             }
 633         if (instruction->times > 1)
 634             list->downlevel(LIST_TIMES);
 635         return offset - start;
 636     } else {
 637         /* No match */
 638         switch (m) {
 639         case MERR_OPSIZEMISSING:
 640             error(ERR_NONFATAL, "operation size not specified");
 641             break;
 642         case MERR_OPSIZEMISMATCH:
 643             error(ERR_NONFATAL, "mismatch in operand sizes");
 644             break;
 645         case MERR_BADCPU:
 646             error(ERR_NONFATAL, "no instruction for this cpu level");
 647             break;
 648         case MERR_BADMODE:
 649             error(ERR_NONFATAL, "instruction not supported in %d-bit mode",
 650                   bits);
 651             break;
 652         default:
 653             error(ERR_NONFATAL,
 654                   "invalid combination of opcode and operands");
 655             break;
 656         }
 657     }
 658     return 0;
 659 }
 660
 661 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 662                   insn * instruction, efunc error)
 663 {
 664     const struct itemplate *temp;
 665     enum match_result m;
 666
 667     errfunc = error;            /* to pass to other functions */
 668     cpu = cp;
 669
 670     if (instruction->opcode == -1)
 671         return 0;
 672
 673     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 674         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 675         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 676         instruction->opcode == I_DY) {
 677         extop *e;
 678         int32_t isize, osize, wsize = 0;   /* placate gcc */
 679
 680         isize = 0;
 681         switch (instruction->opcode) {
 682         case I_DB:
 683             wsize = 1;
 684             break;
 685         case I_DW:
 686             wsize = 2;
 687             break;
 688         case I_DD:
 689             wsize = 4;
 690             break;
 691         case I_DQ:
 692             wsize = 8;
 693             break;
 694         case I_DT:
 695             wsize = 10;
 696             break;
 697         case I_DO:
 698             wsize = 16;
 699             break;
 700         case I_DY:
 701             wsize = 32;
 702             break;
 703         default:
 704             break;
 705         }
 706
 707         list_for_each(e, instruction->eops) {
 708             int32_t align;
 709
 710             osize = 0;
 711             if (e->type == EOT_DB_NUMBER)
 712                 osize = 1;
 713             else if (e->type == EOT_DB_STRING ||
 714                      e->type == EOT_DB_STRING_FREE)
 715                 osize = e->stringlen;
 716
 717             align = (-osize) % wsize;
 718             if (align < 0)
 719                 align += wsize;
 720             isize += osize + align;
 721         }
 722         return isize * instruction->times;
 723     }
 724
 725     if (instruction->opcode == I_INCBIN) {
 726         const char *fname = instruction->eops->stringval;
 727         FILE *fp;
 728         size_t len;
 729
 730         fp = fopen(fname, "rb");
 731         if (!fp)
 732             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 733                   fname);
 734         else if (fseek(fp, 0L, SEEK_END) < 0)
 735             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 736                   fname);
 737         else {
 738             len = ftell(fp);
 739             fclose(fp);
 740             if (instruction->eops->next) {
 741                 len -= instruction->eops->next->offset;
 742                 if (instruction->eops->next->next &&
 743                     len > (size_t)instruction->eops->next->next->offset) {
 744                     len = (size_t)instruction->eops->next->next->offset;
 745                 }
 746             }
 747             return instruction->times * len;
 748         }
 749         return 0;               /* if we're here, there's an error */
 750     }
 751
 752     /* Check to see if we need an address-size prefix */
 753     add_asp(instruction, bits);
 754
 755     m = find_match(&temp, instruction, segment, offset, bits);
 756     if (m == MOK_GOOD) {
 757         /* we've matched an instruction. */
 758         int64_t isize;
 759         const uint8_t *codes = temp->code;
 760         int j;
 761
 762         isize = calcsize(segment, offset, bits, instruction, codes);
 763         if (isize < 0)
 764             return -1;
 765         for (j = 0; j < MAXPREFIX; j++) {
 766             switch (instruction->prefixes[j]) {
 767             case P_A16:
 768                 if (bits != 16)
 769                     isize++;
 770                 break;
 771             case P_A32:
 772                 if (bits != 32)
 773                     isize++;
 774                 break;
 775             case P_O16:
 776                 if (bits != 16)
 777                     isize++;
 778                 break;
 779             case P_O32:
 780                 if (bits == 16)
 781                     isize++;
 782                 break;
 783             case P_A64:
 784             case P_O64:
 785             case P_none:
 786                 break;
 787             default:
 788                 isize++;
 789                 break;
 790             }
 791         }
 792         return isize * instruction->times;
 793     } else {
 794         return -1;                  /* didn't match any instruction */
 795     }
 796 }
 797
 798 static bool possible_sbyte(operand *o)
 799 {
 800     return o->wrt == NO_SEG && o->segment == NO_SEG &&
 801         !(o->opflags & OPFLAG_UNKNOWN) &&
 802         optimizing >= 0 && !(o->type & STRICT);
 803 }
 804
 805 /* check that opn[op]  is a signed byte of size 16 or 32 */
 806 static bool is_sbyte16(operand *o)
 807 {
 808     int16_t v;
 809
 810     if (!possible_sbyte(o))
 811         return false;
 812
 813     v = o->offset;
 814     return v >= -128 && v <= 127;
 815 }
 816
 817 static bool is_sbyte32(operand *o)
 818 {
 819     int32_t v;
 820
 821     if (!possible_sbyte(o))
 822         return false;
 823
 824     v = o->offset;
 825     return v >= -128 && v <= 127;
 826 }
 827
 828 /* Common construct */
 829 #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
 830
 831 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 832                         insn * ins, const uint8_t *codes)
 833 {
 834     int64_t length = 0;
 835     uint8_t c;
 836     int rex_mask = ~0;
 837     int op1, op2;
 838     struct operand *opx;
 839     uint8_t opex = 0;
 840
 841     ins->rex = 0;               /* Ensure REX is reset */
 842
 843     if (ins->prefixes[PPS_OSIZE] == P_O64)
 844         ins->rex |= REX_W;
 845
 846     (void)segment;              /* Don't warn that this parameter is unused */
 847     (void)offset;               /* Don't warn that this parameter is unused */
 848
 849     while (*codes) {
 850         c = *codes++;
 851         op1 = (c & 3) + ((opex & 1) << 2);
 852         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
 853         opx = &ins->oprs[op1];
 854         opex = 0;               /* For the next iteration */
 855
 856         switch (c) {
 857         case 01:
 858         case 02:
 859         case 03:
 860         case 04:
 861             codes += c, length += c;
 862             break;
 863
 864         case 05:
 865         case 06:
 866         case 07:
 867             opex = c;
 868             break;
 869
 870         case4(010):
 871             ins->rex |=
 872                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 873             codes++, length++;
 874             break;
 875
 876         case4(014):
 877         case4(020):
 878         case4(024):
 879             length++;
 880             break;
 881
 882         case4(030):
 883             length += 2;
 884             break;
 885
 886         case4(034):
 887             if (opx->type & (BITS16 | BITS32 | BITS64))
 888                 length += (opx->type & BITS16) ? 2 : 4;
 889             else
 890                 length += (bits == 16) ? 2 : 4;
 891             break;
 892
 893         case4(040):
 894             length += 4;
 895             break;
 896
 897         case4(044):
 898             length += ins->addr_size >> 3;
 899             break;
 900
 901         case4(050):
 902             length++;
 903             break;
 904
 905         case4(054):
 906             length += 8; /* MOV reg64/imm */
 907             break;
 908
 909         case4(060):
 910             length += 2;
 911             break;
 912
 913         case4(064):
 914             if (opx->type & (BITS16 | BITS32 | BITS64))
 915                 length += (opx->type & BITS16) ? 2 : 4;
 916             else
 917                 length += (bits == 16) ? 2 : 4;
 918             break;
 919
 920         case4(070):
 921             length += 4;
 922             break;
 923
 924         case4(074):
 925             length += 2;
 926             break;
 927
 928         case4(0140):
 929             length += is_sbyte16(opx) ? 1 : 2;
 930             break;
 931
 932         case4(0144):
 933             codes++;
 934             length++;
 935             break;
 936
 937         case4(0150):
 938             length += is_sbyte32(opx) ? 1 : 4;
 939             break;
 940
 941         case4(0154):
 942             codes++;
 943             length++;
 944             break;
 945
 946         case4(0160):
 947             length++;
 948             ins->rex |= REX_D;
 949             ins->drexdst = regval(opx);
 950             break;
 951
 952         case4(0164):
 953             length++;
 954             ins->rex |= REX_D|REX_OC;
 955             ins->drexdst = regval(opx);
 956             break;
 957
 958         case 0171:
 959             break;
 960
 961         case 0172:
 962         case 0173:
 963         case 0174:
 964             codes++;
 965             length++;
 966             break;
 967
 968         case4(0250):
 969             length += is_sbyte32(opx) ? 1 : 4;
 970             break;
 971
 972         case4(0254):
 973             length += 4;
 974             break;
 975
 976         case4(0260):
 977             ins->rex |= REX_V;
 978             ins->drexdst = regval(opx);
 979             ins->vex_cm = *codes++;
 980             ins->vex_wlp = *codes++;
 981             break;
 982
 983         case 0270:
 984             ins->rex |= REX_V;
 985             ins->drexdst = 0;
 986             ins->vex_cm = *codes++;
 987             ins->vex_wlp = *codes++;
 988             break;
 989
 990         case4(0274):
 991             length++;
 992             break;
 993
 994         case4(0300):
 995             break;
 996
 997         case 0310:
 998             if (bits == 64)
 999                 return -1;
1000             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
1001             break;
1002
1003         case 0311:
1004             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
1005             break;
1006
1007         case 0312:
1008             break;
1009
1010         case 0313:
1011             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
1012                 has_prefix(ins, PPS_ASIZE, P_A32))
1013                 return -1;
1014             break;
1015
1016         case4(0314):
1017             break;
1018
1019         case 0320:
1020             length += (bits != 16);
1021             break;
1022
1023         case 0321:
1024             length += (bits == 16);
1025             break;
1026
1027         case 0322:
1028             break;
1029
1030         case 0323:
1031             rex_mask &= ~REX_W;
1032             break;
1033
1034         case 0324:
1035             ins->rex |= REX_W;
1036             break;
1037
1038         case 0325:
1039             ins->rex |= REX_NH;
1040             break;
1041
1042         case 0330:
1043             codes++, length++;
1044             break;
1045
1046         case 0331:
1047             break;
1048
1049         case 0332:
1050         case 0333:
1051             length++;
1052             break;
1053
1054         case 0334:
1055             ins->rex |= REX_L;
1056             break;
1057
1058         case 0335:
1059             break;
1060
1061         case 0336:
1062             if (!ins->prefixes[PPS_LREP])
1063                 ins->prefixes[PPS_LREP] = P_REP;
1064             break;
1065
1066         case 0337:
1067             if (!ins->prefixes[PPS_LREP])
1068                 ins->prefixes[PPS_LREP] = P_REPNE;
1069             break;
1070
1071         case 0340:
1072             if (ins->oprs[0].segment != NO_SEG)
1073                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1074                         " quantity of BSS space");
1075             else
1076                 length += ins->oprs[0].offset;
1077             break;
1078
1079         case 0341:
1080             if (!ins->prefixes[PPS_WAIT])
1081                 ins->prefixes[PPS_WAIT] = P_WAIT;
1082             break;
1083
1084         case4(0344):
1085             length++;
1086             break;
1087
1088         case 0360:
1089             break;
1090
1091         case 0361:
1092         case 0362:
1093         case 0363:
1094             length++;
1095             break;
1096
1097         case 0364:
1098         case 0365:
1099             break;
1100
1101         case 0366:
1102         case 0367:
1103             length++;
1104             break;
1105
1106         case 0370:
1107         case 0371:
1108         case 0372:
1109             break;
1110
1111         case 0373:
1112             length++;
1113             break;
1114
1115         case4(0100):
1116         case4(0110):
1117         case4(0120):
1118         case4(0130):
1119         case4(0200):
1120         case4(0204):
1121         case4(0210):
1122         case4(0214):
1123         case4(0220):
1124         case4(0224):
1125         case4(0230):
1126         case4(0234):
1127             {
1128                 ea ea_data;
1129                 int rfield;
1130                 int32_t rflags;
1131                 struct operand *opy = &ins->oprs[op2];
1132
1133                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1134
1135                 if (c <= 0177) {
1136                     /* pick rfield from operand b (opx) */
1137                     rflags = regflag(opx);
1138                     rfield = nasm_regvals[opx->basereg];
1139                 } else {
1140                     rflags = 0;
1141                     rfield = c & 7;
1142                 }
1143                 if (!process_ea(opy, &ea_data, bits,
1144                                 ins->addr_size, rfield, rflags)) {
1145                     errfunc(ERR_NONFATAL, "invalid effective address");
1146                     return -1;
1147                 } else {
1148                     ins->rex |= ea_data.rex;
1149                     length += ea_data.size;
1150                 }
1151             }
1152             break;
1153
1154         default:
1155             errfunc(ERR_PANIC, "internal instruction table corrupt"
1156                     ": instruction code \\%o (0x%02X) given", c, c);
1157             break;
1158         }
1159     }
1160
1161     ins->rex &= rex_mask;
1162
1163     if (ins->rex & REX_NH) {
1164         if (ins->rex & REX_H) {
1165             errfunc(ERR_NONFATAL, "instruction cannot use high registers");
1166             return -1;
1167         }
1168         ins->rex &= ~REX_P;     /* Don't force REX prefix due to high reg */
1169     }
1170
1171     if (ins->rex & REX_V) {
1172         int bad32 = REX_R|REX_W|REX_X|REX_B;
1173
1174         if (ins->rex & REX_H) {
1175             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1176             return -1;
1177         }
1178         switch (ins->vex_wlp & 030) {
1179         case 000:
1180         case 020:
1181             ins->rex &= ~REX_W;
1182             break;
1183         case 010:
1184             ins->rex |= REX_W;
1185             bad32 &= ~REX_W;
1186             break;
1187         case 030:
1188             /* Follow REX_W */
1189             break;
1190         }
1191
1192         if (bits != 64 && ((ins->rex & bad32) || ins->drexdst > 7)) {
1193             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1194             return -1;
1195         }
1196         if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_R|REX_B)))
1197             length += 3;
1198         else
1199             length += 2;
1200     } else if (ins->rex & REX_D) {
1201         if (ins->rex & REX_H) {
1202             errfunc(ERR_NONFATAL, "cannot use high register in drex instruction");
1203             return -1;
1204         }
1205         if (bits != 64 && ((ins->rex & (REX_R|REX_W|REX_X|REX_B)) ||
1206                            ins->drexdst > 7)) {
1207             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1208             return -1;
1209         }
1210         length++;
1211     } else if (ins->rex & REX_REAL) {
1212         if (ins->rex & REX_H) {
1213             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1214             return -1;
1215         } else if (bits == 64) {
1216             length++;
1217         } else if ((ins->rex & REX_L) &&
1218                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1219                    cpu >= IF_X86_64) {
1220             /* LOCK-as-REX.R */
1221             assert_no_prefix(ins, PPS_LREP);
1222             length++;
1223         } else {
1224             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1225             return -1;
1226         }
1227     }
1228
1229     return length;
1230 }
1231
1232 #define EMIT_REX()                                                      \
1233     if (!(ins->rex & (REX_D|REX_V)) && (ins->rex & REX_REAL) && (bits == 64)) { \
1234         ins->rex = (ins->rex & REX_REAL)|REX_P;                         \
1235         out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG); \
1236         ins->rex = 0;                                                   \
1237         offset += 1; \
1238     }
1239
1240 static void gencode(int32_t segment, int64_t offset, int bits,
1241                     insn * ins, const struct itemplate *temp,
1242                     int64_t insn_end)
1243 {
1244     static char condval[] = {   /* conditional opcodes */
1245         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1246         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1247         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1248     };
1249     uint8_t c;
1250     uint8_t bytes[4];
1251     int64_t size;
1252     int64_t data;
1253     int op1, op2;
1254     struct operand *opx;
1255     const uint8_t *codes = temp->code;
1256     uint8_t opex = 0;
1257
1258     while (*codes) {
1259         c = *codes++;
1260         op1 = (c & 3) + ((opex & 1) << 2);
1261         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
1262         opx = &ins->oprs[op1];
1263         opex = 0;               /* For the next iteration */
1264
1265         switch (c) {
1266         case 01:
1267         case 02:
1268         case 03:
1269         case 04:
1270             EMIT_REX();
1271             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1272             codes += c;
1273             offset += c;
1274             break;
1275
1276         case 05:
1277         case 06:
1278         case 07:
1279             opex = c;
1280             break;
1281
1282         case4(010):
1283             EMIT_REX();
1284             bytes[0] = *codes++ + (regval(opx) & 7);
1285             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1286             offset += 1;
1287             break;
1288
1289         case4(014):
1290             /* The test for BITS8 and SBYTE here is intended to avoid
1291                warning on optimizer actions due to SBYTE, while still
1292                warn on explicit BYTE directives.  Also warn, obviously,
1293                if the optimizer isn't enabled. */
1294             if (((opx->type & BITS8) ||
1295                  !(opx->type & temp->opd[op1] & BYTENESS)) &&
1296                 (opx->offset < -128 || opx->offset > 127)) {
1297                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1298                         "signed byte value exceeds bounds");
1299             }
1300             if (opx->segment != NO_SEG) {
1301                 data = opx->offset;
1302                 out(offset, segment, &data, OUT_ADDRESS, 1,
1303                     opx->segment, opx->wrt);
1304             } else {
1305                 bytes[0] = opx->offset;
1306                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1307                     NO_SEG);
1308             }
1309             offset += 1;
1310             break;
1311
1312         case4(020):
1313             if (opx->offset < -256 || opx->offset > 255) {
1314                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1315                         "byte value exceeds bounds");
1316             }
1317             if (opx->segment != NO_SEG) {
1318                 data = opx->offset;
1319                 out(offset, segment, &data, OUT_ADDRESS, 1,
1320                     opx->segment, opx->wrt);
1321             } else {
1322                 bytes[0] = opx->offset;
1323                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1324                     NO_SEG);
1325             }
1326             offset += 1;
1327             break;
1328
1329         case4(024):
1330             if (opx->offset < 0 || opx->offset > 255)
1331                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1332                         "unsigned byte value exceeds bounds");
1333             if (opx->segment != NO_SEG) {
1334                 data = opx->offset;
1335                 out(offset, segment, &data, OUT_ADDRESS, 1,
1336                     opx->segment, opx->wrt);
1337             } else {
1338                 bytes[0] = opx->offset;
1339                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1340                     NO_SEG);
1341             }
1342             offset += 1;
1343             break;
1344
1345         case4(030):
1346             warn_overflow(2, opx);
1347             data = opx->offset;
1348             out(offset, segment, &data, OUT_ADDRESS, 2,
1349                 opx->segment, opx->wrt);
1350             offset += 2;
1351             break;
1352
1353         case4(034):
1354             if (opx->type & (BITS16 | BITS32))
1355                 size = (opx->type & BITS16) ? 2 : 4;
1356             else
1357                 size = (bits == 16) ? 2 : 4;
1358             warn_overflow(size, opx);
1359             data = opx->offset;
1360             out(offset, segment, &data, OUT_ADDRESS, size,
1361                 opx->segment, opx->wrt);
1362             offset += size;
1363             break;
1364
1365         case4(040):
1366             warn_overflow(4, opx);
1367             data = opx->offset;
1368             out(offset, segment, &data, OUT_ADDRESS, 4,
1369                 opx->segment, opx->wrt);
1370             offset += 4;
1371             break;
1372
1373         case4(044):
1374             data = opx->offset;
1375             size = ins->addr_size >> 3;
1376             warn_overflow(size, opx);
1377             out(offset, segment, &data, OUT_ADDRESS, size,
1378                 opx->segment, opx->wrt);
1379             offset += size;
1380             break;
1381
1382         case4(050):
1383             if (opx->segment != segment)
1384                 errfunc(ERR_NONFATAL,
1385                         "short relative jump outside segment");
1386             data = opx->offset - insn_end;
1387             if (data > 127 || data < -128)
1388                 errfunc(ERR_NONFATAL, "short jump is out of range");
1389             bytes[0] = data;
1390             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1391             offset += 1;
1392             break;
1393
1394         case4(054):
1395             data = (int64_t)opx->offset;
1396             out(offset, segment, &data, OUT_ADDRESS, 8,
1397                 opx->segment, opx->wrt);
1398             offset += 8;
1399             break;
1400
1401         case4(060):
1402             if (opx->segment != segment) {
1403                 data = opx->offset;
1404                 out(offset, segment, &data,
1405                     OUT_REL2ADR, insn_end - offset,
1406                     opx->segment, opx->wrt);
1407             } else {
1408                 data = opx->offset - insn_end;
1409                 out(offset, segment, &data,
1410                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1411             }
1412             offset += 2;
1413             break;
1414
1415         case4(064):
1416             if (opx->type & (BITS16 | BITS32 | BITS64))
1417                 size = (opx->type & BITS16) ? 2 : 4;
1418             else
1419                 size = (bits == 16) ? 2 : 4;
1420             if (opx->segment != segment) {
1421                 data = opx->offset;
1422                 out(offset, segment, &data,
1423                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1424                     insn_end - offset, opx->segment, opx->wrt);
1425             } else {
1426                 data = opx->offset - insn_end;
1427                 out(offset, segment, &data,
1428                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1429             }
1430             offset += size;
1431             break;
1432
1433         case4(070):
1434             if (opx->segment != segment) {
1435                 data = opx->offset;
1436                 out(offset, segment, &data,
1437                     OUT_REL4ADR, insn_end - offset,
1438                     opx->segment, opx->wrt);
1439             } else {
1440                 data = opx->offset - insn_end;
1441                 out(offset, segment, &data,
1442                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1443             }
1444             offset += 4;
1445             break;
1446
1447         case4(074):
1448             if (opx->segment == NO_SEG)
1449                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1450                         " relocatable");
1451             data = 0;
1452             out(offset, segment, &data, OUT_ADDRESS, 2,
1453                 outfmt->segbase(1 + opx->segment),
1454                 opx->wrt);
1455             offset += 2;
1456             break;
1457
1458         case4(0140):
1459             data = opx->offset;
1460             warn_overflow(2, opx);
1461             if (is_sbyte16(opx)) {
1462                 bytes[0] = data;
1463                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1464                     NO_SEG);
1465                 offset++;
1466             } else {
1467                 out(offset, segment, &data, OUT_ADDRESS, 2,
1468                     opx->segment, opx->wrt);
1469                 offset += 2;
1470             }
1471             break;
1472
1473         case4(0144):
1474             EMIT_REX();
1475             bytes[0] = *codes++;
1476             if (is_sbyte16(opx))
1477                 bytes[0] |= 2;  /* s-bit */
1478             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1479             offset++;
1480             break;
1481
1482         case4(0150):
1483             data = opx->offset;
1484             warn_overflow(4, opx);
1485             if (is_sbyte32(opx)) {
1486                 bytes[0] = data;
1487                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1488                     NO_SEG);
1489                 offset++;
1490             } else {
1491                 out(offset, segment, &data, OUT_ADDRESS, 4,
1492                     opx->segment, opx->wrt);
1493                 offset += 4;
1494             }
1495             break;
1496
1497         case4(0154):
1498             EMIT_REX();
1499             bytes[0] = *codes++;
1500             if (is_sbyte32(opx))
1501                 bytes[0] |= 2;  /* s-bit */
1502             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1503             offset++;
1504             break;
1505
1506         case4(0160):
1507         case4(0164):
1508             break;
1509
1510         case 0171:
1511             bytes[0] =
1512                 (ins->drexdst << 4) |
1513                 (ins->rex & REX_OC ? 0x08 : 0) |
1514                 (ins->rex & (REX_R|REX_X|REX_B));
1515             ins->rex = 0;
1516             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1517             offset++;
1518             break;
1519
1520         case 0172:
1521             c = *codes++;
1522             opx = &ins->oprs[c >> 3];
1523             bytes[0] = nasm_regvals[opx->basereg] << 4;
1524             opx = &ins->oprs[c & 7];
1525             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1526                 errfunc(ERR_NONFATAL,
1527                         "non-absolute expression not permitted as argument %d",
1528                         c & 7);
1529             } else {
1530                 if (opx->offset & ~15) {
1531                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1532                             "four-bit argument exceeds bounds");
1533                 }
1534                 bytes[0] |= opx->offset & 15;
1535             }
1536             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1537             offset++;
1538             break;
1539
1540         case 0173:
1541             c = *codes++;
1542             opx = &ins->oprs[c >> 4];
1543             bytes[0] = nasm_regvals[opx->basereg] << 4;
1544             bytes[0] |= c & 15;
1545             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1546             offset++;
1547             break;
1548
1549         case 0174:
1550             c = *codes++;
1551             opx = &ins->oprs[c];
1552             bytes[0] = nasm_regvals[opx->basereg] << 4;
1553             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1554             offset++;
1555             break;
1556
1557         case4(0250):
1558             data = opx->offset;
1559             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1560                 (int32_t)data != (int64_t)data) {
1561                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1562                         "signed dword immediate exceeds bounds");
1563             }
1564             if (is_sbyte32(opx)) {
1565                 bytes[0] = data;
1566                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1567                     NO_SEG);
1568                 offset++;
1569             } else {
1570                 out(offset, segment, &data, OUT_ADDRESS, 4,
1571                     opx->segment, opx->wrt);
1572                 offset += 4;
1573             }
1574             break;
1575
1576         case4(0254):
1577             data = opx->offset;
1578             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1579                 (int32_t)data != (int64_t)data) {
1580                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1581                         "signed dword immediate exceeds bounds");
1582             }
1583             out(offset, segment, &data, OUT_ADDRESS, 4,
1584                 opx->segment, opx->wrt);
1585             offset += 4;
1586             break;
1587
1588         case4(0260):
1589         case 0270:
1590             codes += 2;
1591             if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1592                 bytes[0] = (ins->vex_cm >> 6) ? 0x8f : 0xc4;
1593                 bytes[1] = (ins->vex_cm & 31) | ((~ins->rex & 7) << 5);
1594                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1595                     ((~ins->drexdst & 15)<< 3) | (ins->vex_wlp & 07);
1596                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1597                 offset += 3;
1598             } else {
1599                 bytes[0] = 0xc5;
1600                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1601                     ((~ins->drexdst & 15) << 3) | (ins->vex_wlp & 07);
1602                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1603                 offset += 2;
1604             }
1605             break;
1606
1607         case4(0274):
1608         {
1609             uint64_t uv, um;
1610             int s;
1611
1612             if (ins->rex & REX_W)
1613                 s = 64;
1614             else if (ins->prefixes[PPS_OSIZE] == P_O16)
1615                 s = 16;
1616             else if (ins->prefixes[PPS_OSIZE] == P_O32)
1617                 s = 32;
1618             else
1619                 s = bits;
1620
1621             um = (uint64_t)2 << (s-1);
1622             uv = opx->offset;
1623
1624             if (uv > 127 && uv < (uint64_t)-128 &&
1625                 (uv < um-128 || uv > um-1)) {
1626                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1627                         "signed byte value exceeds bounds");
1628             }
1629             if (opx->segment != NO_SEG) {
1630                 data = uv;
1631                 out(offset, segment, &data, OUT_ADDRESS, 1,
1632                     opx->segment, opx->wrt);
1633             } else {
1634                 bytes[0] = uv;
1635                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1636                     NO_SEG);
1637             }
1638             offset += 1;
1639             break;
1640         }
1641
1642         case4(0300):
1643             break;
1644
1645         case 0310:
1646             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1647                 *bytes = 0x67;
1648                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1649                 offset += 1;
1650             } else
1651                 offset += 0;
1652             break;
1653
1654         case 0311:
1655             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1656                 *bytes = 0x67;
1657                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1658                 offset += 1;
1659             } else
1660                 offset += 0;
1661             break;
1662
1663         case 0312:
1664             break;
1665
1666         case 0313:
1667             ins->rex = 0;
1668             break;
1669
1670         case4(0314):
1671             break;
1672
1673         case 0320:
1674             if (bits != 16) {
1675                 *bytes = 0x66;
1676                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1677                 offset += 1;
1678             } else
1679                 offset += 0;
1680             break;
1681
1682         case 0321:
1683             if (bits == 16) {
1684                 *bytes = 0x66;
1685                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1686                 offset += 1;
1687             } else
1688                 offset += 0;
1689             break;
1690
1691         case 0322:
1692         case 0323:
1693             break;
1694
1695         case 0324:
1696             ins->rex |= REX_W;
1697             break;
1698
1699         case 0325:
1700             break;
1701
1702         case 0330:
1703             *bytes = *codes++ ^ condval[ins->condition];
1704             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1705             offset += 1;
1706             break;
1707
1708         case 0331:
1709             break;
1710
1711         case 0332:
1712         case 0333:
1713             *bytes = c - 0332 + 0xF2;
1714             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1715             offset += 1;
1716             break;
1717
1718         case 0334:
1719             if (ins->rex & REX_R) {
1720                 *bytes = 0xF0;
1721                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1722                 offset += 1;
1723             }
1724             ins->rex &= ~(REX_L|REX_R);
1725             break;
1726
1727         case 0335:
1728             break;
1729
1730         case 0336:
1731         case 0337:
1732             break;
1733
1734         case 0340:
1735             if (ins->oprs[0].segment != NO_SEG)
1736                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1737             else {
1738                 int64_t size = ins->oprs[0].offset;
1739                 if (size > 0)
1740                     out(offset, segment, NULL,
1741                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1742                 offset += size;
1743             }
1744             break;
1745
1746         case 0341:
1747             break;
1748
1749         case 0344:
1750         case 0345:
1751             bytes[0] = c & 1;
1752             switch (ins->oprs[0].basereg) {
1753             case R_CS:
1754                 bytes[0] += 0x0E;
1755                 break;
1756             case R_DS:
1757                 bytes[0] += 0x1E;
1758                 break;
1759             case R_ES:
1760                 bytes[0] += 0x06;
1761                 break;
1762             case R_SS:
1763                 bytes[0] += 0x16;
1764                 break;
1765             default:
1766                 errfunc(ERR_PANIC,
1767                         "bizarre 8086 segment register received");
1768             }
1769             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1770             offset++;
1771             break;
1772
1773         case 0346:
1774         case 0347:
1775             bytes[0] = c & 1;
1776             switch (ins->oprs[0].basereg) {
1777             case R_FS:
1778                 bytes[0] += 0xA0;
1779                 break;
1780             case R_GS:
1781                 bytes[0] += 0xA8;
1782                 break;
1783             default:
1784                 errfunc(ERR_PANIC,
1785                         "bizarre 386 segment register received");
1786             }
1787             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1788             offset++;
1789             break;
1790
1791         case 0360:
1792             break;
1793
1794         case 0361:
1795             bytes[0] = 0x66;
1796             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1797             offset += 1;
1798             break;
1799
1800         case 0362:
1801         case 0363:
1802             bytes[0] = c - 0362 + 0xf2;
1803             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1804             offset += 1;
1805             break;
1806
1807         case 0364:
1808         case 0365:
1809             break;
1810
1811         case 0366:
1812         case 0367:
1813             *bytes = c - 0366 + 0x66;
1814             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1815             offset += 1;
1816             break;
1817
1818         case 0370:
1819         case 0371:
1820         case 0372:
1821             break;
1822
1823         case 0373:
1824             *bytes = bits == 16 ? 3 : 5;
1825             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1826             offset += 1;
1827             break;
1828
1829         case4(0100):
1830         case4(0110):
1831         case4(0120):
1832         case4(0130):
1833         case4(0200):
1834         case4(0204):
1835         case4(0210):
1836         case4(0214):
1837         case4(0220):
1838         case4(0224):
1839         case4(0230):
1840         case4(0234):
1841             {
1842                 ea ea_data;
1843                 int rfield;
1844                 int32_t rflags;
1845                 uint8_t *p;
1846                 int32_t s;
1847                 enum out_type type;
1848                 struct operand *opy = &ins->oprs[op2];
1849
1850                 if (c <= 0177) {
1851                     /* pick rfield from operand b (opx) */
1852                     rflags = regflag(opx);
1853                     rfield = nasm_regvals[opx->basereg];
1854                 } else {
1855                     /* rfield is constant */
1856                     rflags = 0;
1857                     rfield = c & 7;
1858                 }
1859
1860                 if (!process_ea(opy, &ea_data, bits, ins->addr_size,
1861                                 rfield, rflags)) {
1862                     errfunc(ERR_NONFATAL, "invalid effective address");
1863                 }
1864
1865
1866                 p = bytes;
1867                 *p++ = ea_data.modrm;
1868                 if (ea_data.sib_present)
1869                     *p++ = ea_data.sib;
1870
1871                 /* DREX suffixes come between the SIB and the displacement */
1872                 if (ins->rex & REX_D) {
1873                     *p++ = (ins->drexdst << 4) |
1874                            (ins->rex & REX_OC ? 0x08 : 0) |
1875                            (ins->rex & (REX_R|REX_X|REX_B));
1876                     ins->rex = 0;
1877                 }
1878
1879                 s = p - bytes;
1880                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1881
1882                 /*
1883                  * Make sure the address gets the right offset in case
1884                  * the line breaks in the .lst file (BR 1197827)
1885                  */
1886                 offset += s;
1887                 s = 0;
1888
1889                 switch (ea_data.bytes) {
1890                 case 0:
1891                     break;
1892                 case 1:
1893                 case 2:
1894                 case 4:
1895                 case 8:
1896                     data = opy->offset;
1897                     warn_overflow(ea_data.bytes, opy);
1898                     s += ea_data.bytes;
1899                     if (ea_data.rip) {
1900                         if (opy->segment == segment) {
1901                             data -= insn_end;
1902                             out(offset, segment, &data, OUT_ADDRESS,
1903                                 ea_data.bytes, NO_SEG, NO_SEG);
1904                         } else {
1905                             out(offset, segment, &data, OUT_REL4ADR,
1906                                 insn_end - offset, opy->segment, opy->wrt);
1907                         }
1908                     } else {
1909                         type = OUT_ADDRESS;
1910                         out(offset, segment, &data, OUT_ADDRESS,
1911                             ea_data.bytes, opy->segment, opy->wrt);
1912                     }
1913                     break;
1914                 default:
1915                     /* Impossible! */
1916                     errfunc(ERR_PANIC,
1917                             "Invalid amount of bytes (%d) for offset?!",
1918                             ea_data.bytes);
1919                     break;
1920                 }
1921                 offset += s;
1922             }
1923             break;
1924
1925         default:
1926             errfunc(ERR_PANIC, "internal instruction table corrupt"
1927                     ": instruction code \\%o (0x%02X) given", c, c);
1928             break;
1929         }
1930     }
1931 }
1932
1933 static int32_t regflag(const operand * o)
1934 {
1935     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1936         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1937     }
1938     return nasm_reg_flags[o->basereg];
1939 }
1940
1941 static int32_t regval(const operand * o)
1942 {
1943     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1944         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1945     }
1946     return nasm_regvals[o->basereg];
1947 }
1948
1949 static int op_rexflags(const operand * o, int mask)
1950 {
1951     int32_t flags;
1952     int val;
1953
1954     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1955         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1956     }
1957
1958     flags = nasm_reg_flags[o->basereg];
1959     val = nasm_regvals[o->basereg];
1960
1961     return rexflags(val, flags, mask);
1962 }
1963
1964 static int rexflags(int val, int32_t flags, int mask)
1965 {
1966     int rex = 0;
1967
1968     if (val >= 8)
1969         rex |= REX_B|REX_X|REX_R;
1970     if (flags & BITS64)
1971         rex |= REX_W;
1972     if (!(REG_HIGH & ~flags))   /* AH, CH, DH, BH */
1973         rex |= REX_H;
1974     else if (!(REG8 & ~flags) && val >= 4) /* SPL, BPL, SIL, DIL */
1975         rex |= REX_P;
1976
1977     return rex & mask;
1978 }
1979
1980 static enum match_result find_match(const struct itemplate **tempp,
1981                                     insn *instruction,
1982                                     int32_t segment, int64_t offset, int bits)
1983 {
1984     const struct itemplate *temp;
1985     enum match_result m, merr;
1986     int32_t xsizeflags[MAX_OPERANDS];
1987     bool opsizemissing = false;
1988     int i;
1989
1990     for (i = 0; i < instruction->operands; i++)
1991         xsizeflags[i] = instruction->oprs[i].type & SIZE_MASK;
1992
1993     merr = MERR_INVALOP;
1994
1995     for (temp = nasm_instructions[instruction->opcode];
1996          temp->opcode != I_none; temp++) {
1997         m = matches(temp, instruction, bits);
1998         if (m == MOK_JUMP) {
1999             if (jmp_match(segment, offset, bits, instruction, temp->code))
2000                 m = MOK_GOOD;
2001             else
2002                 m = MERR_INVALOP;
2003         } else if (m == MERR_OPSIZEMISSING &&
2004                    (temp->flags & IF_SMASK) != IF_SX) {
2005             /*
2006              * Missing operand size and a candidate for fuzzy matching...
2007              */
2008             for (i = 0; i < temp->operands; i++)
2009                 xsizeflags[i] |= temp->opd[i] & SIZE_MASK;
2010
2011             opsizemissing = true;
2012         }
2013         if (m > merr)
2014             merr = m;
2015         if (merr == MOK_GOOD)
2016             goto done;
2017     }
2018
2019     /* No match, but see if we can get a fuzzy operand size match... */
2020     if (!opsizemissing)
2021         goto done;
2022
2023     for (i = 0; i < instruction->operands; i++) {
2024         /* This tests if xsizeflags[i] has more than one bit set */
2025         if ((xsizeflags[i] & (xsizeflags[i]-1)))
2026             goto done;          /* No luck */
2027
2028         instruction->oprs[i].type |= xsizeflags[i]; /* Set the size */
2029     }
2030
2031     /* Try matching again... */
2032     for (temp = nasm_instructions[instruction->opcode];
2033          temp->opcode != I_none; temp++) {
2034         m = matches(temp, instruction, bits);
2035         if (m == MOK_JUMP) {
2036             if (jmp_match(segment, offset, bits, instruction, temp->code))
2037                 m = MOK_GOOD;
2038             else
2039                 m = MERR_INVALOP;
2040         }
2041         if (m > merr)
2042             merr = m;
2043         if (merr == MOK_GOOD)
2044             goto done;
2045     }
2046
2047 done:
2048     *tempp = temp;
2049     return merr;
2050 }
2051
2052 static enum match_result matches(const struct itemplate *itemp,
2053                                  insn *instruction, int bits)
2054 {
2055     int i, size[MAX_OPERANDS], asize, oprs;
2056     bool opsizemissing = false;
2057
2058     /*
2059      * Check the opcode
2060      */
2061     if (itemp->opcode != instruction->opcode)
2062         return MERR_INVALOP;
2063
2064     /*
2065      * Count the operands
2066      */
2067     if (itemp->operands != instruction->operands)
2068         return MERR_INVALOP;
2069
2070     /*
2071      * Check that no spurious colons or TOs are present
2072      */
2073     for (i = 0; i < itemp->operands; i++)
2074         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
2075             return MERR_INVALOP;
2076
2077     /*
2078      * Process size flags
2079      */
2080     switch (itemp->flags & IF_SMASK) {
2081     case IF_SB:
2082         asize = BITS8;
2083         break;
2084     case IF_SW:
2085         asize = BITS16;
2086         break;
2087     case IF_SD:
2088         asize = BITS32;
2089         break;
2090     case IF_SQ:
2091         asize = BITS64;
2092         break;
2093     case IF_SO:
2094         asize = BITS128;
2095         break;
2096     case IF_SY:
2097         asize = BITS256;
2098         break;
2099     case IF_SZ:
2100         switch (bits) {
2101         case 16:
2102             asize = BITS16;
2103             break;
2104         case 32:
2105             asize = BITS32;
2106             break;
2107         case 64:
2108             asize = BITS64;
2109             break;
2110         default:
2111             asize = 0;
2112             break;
2113         }
2114         break;
2115     default:
2116         asize = 0;
2117         break;
2118     }
2119
2120     if (itemp->flags & IF_ARMASK) {
2121         /* S- flags only apply to a specific operand */
2122         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
2123         memset(size, 0, sizeof size);
2124         size[i] = asize;
2125     } else {
2126         /* S- flags apply to all operands */
2127         for (i = 0; i < MAX_OPERANDS; i++)
2128             size[i] = asize;
2129     }
2130
2131     /*
2132      * Check that the operand flags all match up
2133      */
2134     for (i = 0; i < itemp->operands; i++) {
2135         int32_t type = instruction->oprs[i].type;
2136         if (!(type & SIZE_MASK))
2137             type |= size[i];
2138
2139         if (itemp->opd[i] & SAME_AS) {
2140             int j = itemp->opd[i] & ~SAME_AS;
2141             if (type != instruction->oprs[j].type ||
2142                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2143                 return MERR_INVALOP;
2144         } else if (itemp->opd[i] & ~type ||
2145             ((itemp->opd[i] & SIZE_MASK) &&
2146              ((itemp->opd[i] ^ type) & SIZE_MASK))) {
2147             if ((itemp->opd[i] & ~type & ~SIZE_MASK) || (type & SIZE_MASK))
2148                 return MERR_INVALOP;
2149             else
2150                 opsizemissing = true;
2151         }
2152     }
2153
2154     if (opsizemissing)
2155         return MERR_OPSIZEMISSING;
2156
2157     /*
2158      * Check operand sizes
2159      */
2160     if (itemp->flags & (IF_SM | IF_SM2)) {
2161         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2162         asize = 0;
2163         for (i = 0; i < oprs; i++) {
2164             if ((asize = itemp->opd[i] & SIZE_MASK) != 0) {
2165                 int j;
2166                 for (j = 0; j < oprs; j++)
2167                     size[j] = asize;
2168                 break;
2169             }
2170         }
2171     } else {
2172         oprs = itemp->operands;
2173     }
2174
2175     for (i = 0; i < itemp->operands; i++) {
2176         if (!(itemp->opd[i] & SIZE_MASK) &&
2177             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2178             return MERR_OPSIZEMISMATCH;
2179     }
2180
2181     /*
2182      * Check template is okay at the set cpu level
2183      */
2184     if (((itemp->flags & IF_PLEVEL) > cpu))
2185         return MERR_BADCPU;
2186
2187     /*
2188      * Verify the appropriate long mode flag.
2189      */
2190     if ((itemp->flags & (bits == 64 ? IF_NOLONG : IF_LONG)))
2191         return MERR_BADMODE;
2192
2193     /*
2194      * Check if special handling needed for Jumps
2195      */
2196     if ((itemp->code[0] & 0374) == 0370)
2197         return MOK_JUMP;
2198
2199     return MOK_GOOD;
2200 }
2201
2202 static ea *process_ea(operand * input, ea * output, int bits,
2203                       int addrbits, int rfield, int32_t rflags)
2204 {
2205     bool forw_ref = !!(input->opflags & OPFLAG_UNKNOWN);
2206
2207     output->rip = false;
2208
2209     /* REX flags for the rfield operand */
2210     output->rex |= rexflags(rfield, rflags, REX_R|REX_P|REX_W|REX_H);
2211
2212     if (!(REGISTER & ~input->type)) {   /* register direct */
2213         int i;
2214         int32_t f;
2215
2216         if (input->basereg < EXPR_REG_START /* Verify as Register */
2217             || input->basereg >= REG_ENUM_LIMIT)
2218             return NULL;
2219         f = regflag(input);
2220         i = nasm_regvals[input->basereg];
2221
2222         if (REG_EA & ~f)
2223             return NULL;        /* Invalid EA register */
2224
2225         output->rex |= op_rexflags(input, REX_B|REX_P|REX_W|REX_H);
2226
2227         output->sib_present = false;             /* no SIB necessary */
2228         output->bytes = 0;  /* no offset necessary either */
2229         output->modrm = 0xC0 | ((rfield & 7) << 3) | (i & 7);
2230     } else {                    /* it's a memory reference */
2231         if (input->basereg == -1
2232             && (input->indexreg == -1 || input->scale == 0)) {
2233             /* it's a pure offset */
2234             if (bits == 64 && (~input->type & IP_REL)) {
2235               int scale, index, base;
2236               output->sib_present = true;
2237               scale = 0;
2238               index = 4;
2239               base = 5;
2240               output->sib = (scale << 6) | (index << 3) | base;
2241               output->bytes = 4;
2242               output->modrm = 4 | ((rfield & 7) << 3);
2243               output->rip = false;
2244             } else {
2245               output->sib_present = false;
2246               output->bytes = (addrbits != 16 ? 4 : 2);
2247               output->modrm = (addrbits != 16 ? 5 : 6) | ((rfield & 7) << 3);
2248               output->rip = bits == 64;
2249             }
2250         } else {                /* it's an indirection */
2251             int i = input->indexreg, b = input->basereg, s = input->scale;
2252             int32_t o = input->offset, seg = input->segment;
2253             int hb = input->hintbase, ht = input->hinttype;
2254             int t;
2255             int it, bt;
2256             int32_t ix, bx;     /* register flags */
2257
2258             if (s == 0)
2259                 i = -1;         /* make this easy, at least */
2260
2261             if (i >= EXPR_REG_START && i < REG_ENUM_LIMIT) {
2262                 it = nasm_regvals[i];
2263                 ix = nasm_reg_flags[i];
2264             } else {
2265                 it = -1;
2266                 ix = 0;
2267             }
2268
2269             if (b >= EXPR_REG_START && b < REG_ENUM_LIMIT) {
2270                 bt = nasm_regvals[b];
2271                 bx = nasm_reg_flags[b];
2272             } else {
2273                 bt = -1;
2274                 bx = 0;
2275             }
2276
2277             /* check for a 32/64-bit memory reference... */
2278             if ((ix|bx) & (BITS32|BITS64)) {
2279                 /* it must be a 32/64-bit memory reference. Firstly we have
2280                  * to check that all registers involved are type E/Rxx. */
2281                 int32_t sok = BITS32|BITS64;
2282
2283                 if (it != -1) {
2284                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2285                         sok &= ix;
2286                     else
2287                         return NULL;
2288                 }
2289
2290                 if (bt != -1) {
2291                     if (REG_GPR & ~bx)
2292                         return NULL; /* Invalid register */
2293                     if (~sok & bx & SIZE_MASK)
2294                         return NULL; /* Invalid size */
2295                     sok &= bx;
2296                 }
2297
2298                 /* While we're here, ensure the user didn't specify
2299                    WORD or QWORD. */
2300                 if (input->disp_size == 16 || input->disp_size == 64)
2301                     return NULL;
2302
2303                 if (addrbits == 16 ||
2304                     (addrbits == 32 && !(sok & BITS32)) ||
2305                     (addrbits == 64 && !(sok & BITS64)))
2306                     return NULL;
2307
2308                 /* now reorganize base/index */
2309                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2310                     ((hb == b && ht == EAH_NOTBASE)
2311                      || (hb == i && ht == EAH_MAKEBASE))) {
2312                     /* swap if hints say so */
2313                     t = bt, bt = it, it = t;
2314                     t = bx, bx = ix, ix = t;
2315                 }
2316                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2317                     bt = -1, bx = 0, s++;
2318                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2319                     /* make single reg base, unless hint */
2320                     bt = it, bx = ix, it = -1, ix = 0;
2321                 }
2322                 if (((s == 2 && it != REG_NUM_ESP
2323                       && !(input->eaflags & EAF_TIMESTWO)) || s == 3
2324                      || s == 5 || s == 9) && bt == -1)
2325                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2326                 if (it == -1 && (bt & 7) != REG_NUM_ESP
2327                     && (input->eaflags & EAF_TIMESTWO))
2328                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2329                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2330                 if (s == 1 && it == REG_NUM_ESP) {
2331                     /* swap ESP into base if scale is 1 */
2332                     t = it, it = bt, bt = t;
2333                     t = ix, ix = bx, bx = t;
2334                 }
2335                 if (it == REG_NUM_ESP
2336                     || (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2337                     return NULL;        /* wrong, for various reasons */
2338
2339                 output->rex |= rexflags(it, ix, REX_X);
2340                 output->rex |= rexflags(bt, bx, REX_B);
2341
2342                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2343                     /* no SIB needed */
2344                     int mod, rm;
2345
2346                     if (bt == -1) {
2347                         rm = 5;
2348                         mod = 0;
2349                     } else {
2350                         rm = (bt & 7);
2351                         if (rm != REG_NUM_EBP && o == 0 &&
2352                                 seg == NO_SEG && !forw_ref &&
2353                                 !(input->eaflags &
2354                                   (EAF_BYTEOFFS | EAF_WORDOFFS)))
2355                             mod = 0;
2356                         else if (input->eaflags & EAF_BYTEOFFS ||
2357                                  (o >= -128 && o <= 127 && seg == NO_SEG
2358                                   && !forw_ref
2359                                   && !(input->eaflags & EAF_WORDOFFS)))
2360                             mod = 1;
2361                         else
2362                             mod = 2;
2363                     }
2364
2365                     output->sib_present = false;
2366                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2367                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2368                 } else {
2369                     /* we need a SIB */
2370                     int mod, scale, index, base;
2371
2372                     if (it == -1)
2373                         index = 4, s = 1;
2374                     else
2375                         index = (it & 7);
2376
2377                     switch (s) {
2378                     case 1:
2379                         scale = 0;
2380                         break;
2381                     case 2:
2382                         scale = 1;
2383                         break;
2384                     case 4:
2385                         scale = 2;
2386                         break;
2387                     case 8:
2388                         scale = 3;
2389                         break;
2390                     default:   /* then what the smeg is it? */
2391                         return NULL;    /* panic */
2392                     }
2393
2394                     if (bt == -1) {
2395                         base = 5;
2396                         mod = 0;
2397                     } else {
2398                         base = (bt & 7);
2399                         if (base != REG_NUM_EBP && o == 0 &&
2400                                     seg == NO_SEG && !forw_ref &&
2401                                     !(input->eaflags &
2402                                       (EAF_BYTEOFFS | EAF_WORDOFFS)))
2403                             mod = 0;
2404                         else if (input->eaflags & EAF_BYTEOFFS ||
2405                                  (o >= -128 && o <= 127 && seg == NO_SEG
2406                                   && !forw_ref
2407                                   && !(input->eaflags & EAF_WORDOFFS)))
2408                             mod = 1;
2409                         else
2410                             mod = 2;
2411                     }
2412
2413                     output->sib_present = true;
2414                     output->bytes =  (bt == -1 || mod == 2 ? 4 : mod);
2415                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | 4;
2416                     output->sib = (scale << 6) | (index << 3) | base;
2417                 }
2418             } else {            /* it's 16-bit */
2419                 int mod, rm;
2420
2421                 /* check for 64-bit long mode */
2422                 if (addrbits == 64)
2423                     return NULL;
2424
2425                 /* check all registers are BX, BP, SI or DI */
2426                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI
2427                      && b != R_DI) || (i != -1 && i != R_BP && i != R_BX
2428                                        && i != R_SI && i != R_DI))
2429                     return NULL;
2430
2431                 /* ensure the user didn't specify DWORD/QWORD */
2432                 if (input->disp_size == 32 || input->disp_size == 64)
2433                     return NULL;
2434
2435                 if (s != 1 && i != -1)
2436                     return NULL;        /* no can do, in 16-bit EA */
2437                 if (b == -1 && i != -1) {
2438                     int tmp = b;
2439                     b = i;
2440                     i = tmp;
2441                 }               /* swap */
2442                 if ((b == R_SI || b == R_DI) && i != -1) {
2443                     int tmp = b;
2444                     b = i;
2445                     i = tmp;
2446                 }
2447                 /* have BX/BP as base, SI/DI index */
2448                 if (b == i)
2449                     return NULL;        /* shouldn't ever happen, in theory */
2450                 if (i != -1 && b != -1 &&
2451                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2452                     return NULL;        /* invalid combinations */
2453                 if (b == -1)    /* pure offset: handled above */
2454                     return NULL;        /* so if it gets to here, panic! */
2455
2456                 rm = -1;
2457                 if (i != -1)
2458                     switch (i * 256 + b) {
2459                     case R_SI * 256 + R_BX:
2460                         rm = 0;
2461                         break;
2462                     case R_DI * 256 + R_BX:
2463                         rm = 1;
2464                         break;
2465                     case R_SI * 256 + R_BP:
2466                         rm = 2;
2467                         break;
2468                     case R_DI * 256 + R_BP:
2469                         rm = 3;
2470                         break;
2471                 } else
2472                     switch (b) {
2473                     case R_SI:
2474                         rm = 4;
2475                         break;
2476                     case R_DI:
2477                         rm = 5;
2478                         break;
2479                     case R_BP:
2480                         rm = 6;
2481                         break;
2482                     case R_BX:
2483                         rm = 7;
2484                         break;
2485                     }
2486                 if (rm == -1)   /* can't happen, in theory */
2487                     return NULL;        /* so panic if it does */
2488
2489                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2490                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2491                     mod = 0;
2492                 else if (input->eaflags & EAF_BYTEOFFS ||
2493                          (o >= -128 && o <= 127 && seg == NO_SEG
2494                           && !forw_ref
2495                           && !(input->eaflags & EAF_WORDOFFS)))
2496                     mod = 1;
2497                 else
2498                     mod = 2;
2499
2500                 output->sib_present = false;    /* no SIB - it's 16-bit */
2501                 output->bytes = mod;    /* bytes of offset needed */
2502                 output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2503             }
2504         }
2505     }
2506
2507     output->size = 1 + output->sib_present + output->bytes;
2508     return output;
2509 }
2510
2511 static void add_asp(insn *ins, int addrbits)
2512 {
2513     int j, valid;
2514     int defdisp;
2515
2516     valid = (addrbits == 64) ? 64|32 : 32|16;
2517
2518     switch (ins->prefixes[PPS_ASIZE]) {
2519     case P_A16:
2520         valid &= 16;
2521         break;
2522     case P_A32:
2523         valid &= 32;
2524         break;
2525     case P_A64:
2526         valid &= 64;
2527         break;
2528     case P_ASP:
2529         valid &= (addrbits == 32) ? 16 : 32;
2530         break;
2531     default:
2532         break;
2533     }
2534
2535     for (j = 0; j < ins->operands; j++) {
2536         if (!(MEMORY & ~ins->oprs[j].type)) {
2537             int32_t i, b;
2538
2539             /* Verify as Register */
2540             if (ins->oprs[j].indexreg < EXPR_REG_START
2541                 || ins->oprs[j].indexreg >= REG_ENUM_LIMIT)
2542                 i = 0;
2543             else
2544                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2545
2546             /* Verify as Register */
2547             if (ins->oprs[j].basereg < EXPR_REG_START
2548                 || ins->oprs[j].basereg >= REG_ENUM_LIMIT)
2549                 b = 0;
2550             else
2551                 b = nasm_reg_flags[ins->oprs[j].basereg];
2552
2553             if (ins->oprs[j].scale == 0)
2554                 i = 0;
2555
2556             if (!i && !b) {
2557                 int ds = ins->oprs[j].disp_size;
2558                 if ((addrbits != 64 && ds > 8) ||
2559                     (addrbits == 64 && ds == 16))
2560                     valid &= ds;
2561             } else {
2562                 if (!(REG16 & ~b))
2563                     valid &= 16;
2564                 if (!(REG32 & ~b))
2565                     valid &= 32;
2566                 if (!(REG64 & ~b))
2567                     valid &= 64;
2568
2569                 if (!(REG16 & ~i))
2570                     valid &= 16;
2571                 if (!(REG32 & ~i))
2572                     valid &= 32;
2573                 if (!(REG64 & ~i))
2574                     valid &= 64;
2575             }
2576         }
2577     }
2578
2579     if (valid & addrbits) {
2580         ins->addr_size = addrbits;
2581     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2582         /* Add an address size prefix */
2583         enum prefixes pref = (addrbits == 32) ? P_A16 : P_A32;
2584         ins->prefixes[PPS_ASIZE] = pref;
2585         ins->addr_size = (addrbits == 32) ? 16 : 32;
2586     } else {
2587         /* Impossible... */
2588         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2589         ins->addr_size = addrbits; /* Error recovery */
2590     }
2591
2592     defdisp = ins->addr_size == 16 ? 16 : 32;
2593
2594     for (j = 0; j < ins->operands; j++) {
2595         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2596             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp)
2597             != ins->addr_size) {
2598             /* mem_offs sizes must match the address size; if not,
2599                strip the MEM_OFFS bit and match only EA instructions */
2600             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2601         }
2602     }
2603 }