assemble.c

   1 /* ----------------------------------------------------------------------- *
   2  *
   3  *   Copyright 1996-2009 The NASM Authors - All Rights Reserved
   4  *   See the file AUTHORS included with the NASM distribution for
   5  *   the specific copyright holders.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following
   9  *   conditions are met:
  10  *
  11  *   * Redistributions of source code must retain the above copyright
  12  *     notice, this list of conditions and the following disclaimer.
  13  *   * Redistributions in binary form must reproduce the above
  14  *     copyright notice, this list of conditions and the following
  15  *     disclaimer in the documentation and/or other materials provided
  16  *     with the distribution.
  17  *
  18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  19  *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  20  *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  21  *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22  *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  23  *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24  *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  25  *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28  *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  29  *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  30  *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31  *
  32  * ----------------------------------------------------------------------- */
  33
  34 /*
  35  * assemble.c   code generation for the Netwide Assembler
  36  *
  37  * the actual codes (C syntax, i.e. octal):
  38  * \0            - terminates the code. (Unless it's a literal of course.)
  39  * \1..\4        - that many literal bytes follow in the code stream
  40  * \5            - add 4 to the primary operand number (b, low octdigit)
  41  * \6            - add 4 to the secondary operand number (a, middle octdigit)
  42  * \7            - add 4 to both the primary and the secondary operand number
  43  * \10..\13      - a literal byte follows in the code stream, to be added
  44  *                 to the register value of operand 0..3
  45  * \14..\17      - a signed byte immediate operand, from operand 0..3
  46  * \20..\23      - a byte immediate operand, from operand 0..3
  47  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  48  * \30..\33      - a word immediate operand, from operand 0..3
  49  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  50  *                 assembly mode or the operand-size override on the operand
  51  * \40..\43      - a long immediate operand, from operand 0..3
  52  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  53  *                 depending on the address size of the instruction.
  54  * \50..\53      - a byte relative operand, from operand 0..3
  55  * \54..\57      - a qword immediate operand, from operand 0..3
  56  * \60..\63      - a word relative operand, from operand 0..3
  57  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  58  *                 assembly mode or the operand-size override on the operand
  59  * \70..\73      - a long relative operand, from operand 0..3
  60  * \74..\77      - a word constant, from the _segment_ part of operand 0..3
  61  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  62  *                 field the register value of operand b.
  63  * \140..\143    - an immediate word or signed byte for operand 0..3
  64  * \144..\147    - or 2 (s-field) into opcode byte if operand 0..3
  65  *                  is a signed byte rather than a word.  Opcode byte follows.
  66  * \150..\153    - an immediate dword or signed byte for operand 0..3
  67  * \154..\157    - or 2 (s-field) into opcode byte if operand 0..3
  68  *                  is a signed byte rather than a dword.  Opcode byte follows.
  69  * \160..\163    - this instruction uses DREX rather than REX, with the
  70  *                 OC0 field set to 0, and the dest field taken from
  71  *                 operand 0..3.
  72  * \164..\167    - this instruction uses DREX rather than REX, with the
  73  *                 OC0 field set to 1, and the dest field taken from
  74  *                 operand 0..3.
  75  * \171          - placement of DREX suffix in the absence of an EA
  76  * \172\ab       - the register number from operand a in bits 7..4, with
  77  *                 the 4-bit immediate from operand b in bits 3..0.
  78  * \173\xab      - the register number from operand a in bits 7..4, with
  79  *                 the value b in bits 3..0.
  80  * \174\a        - the register number from operand a in bits 7..4, and
  81  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  82  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  83  *                 field equal to digit b.
  84  * \250..\253    - same as \150..\153, except warn if the 64-bit operand
  85  *                 is not equal to the truncated and sign-extended 32-bit
  86  *                 operand; used for 32-bit immediates in 64-bit mode.
  87  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
  88  * \260..\263    - this instruction uses VEX/XOP rather than REX, with the
  89  *                 V field taken from operand 0..3.
  90  * \270          - this instruction uses VEX/XOP rather than REX, with the
  91  *                 V field set to 1111b.
  92  *
  93  * VEX/XOP prefixes are followed by the sequence:
  94  * \tmm\wlp        where mm is the M field; and wlp is:
  95  *                 00 0ww lpp
  96  *                 [w0] ww = 0 for W = 0
  97  *                 [w1] ww = 1 for W = 1
  98  *                 [wx] ww = 2 for W don't care (always assembled as 0)
  99  *                 [ww] ww = 3 for W used as REX.W
 100  *
 101  * t = 0 for VEX (C4/C5), t = 1 for XOP (8F).
 102  *
 103  * \274..\277    - a signed byte immediate operand, from operand 0..3,
 104  *                 which is to be extended to the operand size.
 105  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
 106  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
 107  * \312          - (disassembler only) invalid with non-default address size.
 108  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
 109  * \314          - (disassembler only) invalid with REX.B
 110  * \315          - (disassembler only) invalid with REX.X
 111  * \316          - (disassembler only) invalid with REX.R
 112  * \317          - (disassembler only) invalid with REX.W
 113  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
 114  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
 115  * \322          - indicates that this instruction is only valid when the
 116  *                 operand size is the default (instruction to disassembler,
 117  *                 generates no code in the assembler)
 118  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
 119  * \324          - indicates 64-bit operand size requiring REX prefix.
 120  * \325          - instruction which always uses spl/bpl/sil/dil
 121  * \330          - a literal byte follows in the code stream, to be added
 122  *                 to the condition code value of the instruction.
 123  * \331          - instruction not valid with REP prefix.  Hint for
 124  *                 disassembler only; for SSE instructions.
 125  * \332          - REP prefix (0xF2 byte) used as opcode extension.
 126  * \333          - REP prefix (0xF3 byte) used as opcode extension.
 127  * \334          - LOCK prefix used as REX.R (used in non-64-bit mode)
 128  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
 129  * \336          - force a REP(E) prefix (0xF2) even if not specified.
 130  * \337          - force a REPNE prefix (0xF3) even if not specified.
 131  *                 \336-\337 are still listed as prefixes in the disassembler.
 132  * \340          - reserve <operand 0> bytes of uninitialized storage.
 133  *                 Operand 0 had better be a segmentless constant.
 134  * \341          - this instruction needs a WAIT "prefix"
 135  * \344,\345     - the PUSH/POP (respectively) codes for CS, DS, ES, SS
 136  *                 (POP is never used for CS) depending on operand 0
 137  * \346,\347     - the second byte of PUSH/POP codes for FS, GS, depending
 138  *                 on operand 0
 139  * \360          - no SSE prefix (== \364\331)
 140  * \361          - 66 SSE prefix (== \366\331)
 141  * \362          - F2 SSE prefix (== \364\332)
 142  * \363          - F3 SSE prefix (== \364\333)
 143  * \364          - operand-size prefix (0x66) not permitted
 144  * \365          - address-size prefix (0x67) not permitted
 145  * \366          - operand-size prefix (0x66) used as opcode extension
 146  * \367          - address-size prefix (0x67) used as opcode extension
 147  * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
 148  *                 370 is used for Jcc, 371 is used for JMP.
 149  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 150  *                 used for conditional jump over longer jump
 151  */
 152
 153 #include "compiler.h"
 154
 155 #include <stdio.h>
 156 #include <string.h>
 157 #include <inttypes.h>
 158
 159 #include "nasm.h"
 160 #include "nasmlib.h"
 161 #include "assemble.h"
 162 #include "insns.h"
 163 #include "tables.h"
 164
 165 typedef struct {
 166     int sib_present;                 /* is a SIB byte necessary? */
 167     int bytes;                       /* # of bytes of offset needed */
 168     int size;                        /* lazy - this is sib+bytes+1 */
 169     uint8_t modrm, sib, rex, rip;    /* the bytes themselves */
 170 } ea;
 171
 172 static uint32_t cpu;            /* cpu level received from nasm.c */
 173 static efunc errfunc;
 174 static struct ofmt *outfmt;
 175 static ListGen *list;
 176
 177 static int64_t calcsize(int32_t, int64_t, int, insn *, const uint8_t *);
 178 static void gencode(int32_t segment, int64_t offset, int bits,
 179                     insn * ins, const struct itemplate *temp,
 180                     int64_t insn_end);
 181 static int matches(const struct itemplate *, insn *, int bits);
 182 static int32_t regflag(const operand *);
 183 static int32_t regval(const operand *);
 184 static int rexflags(int, int32_t, int);
 185 static int op_rexflags(const operand *, int);
 186 static ea *process_ea(operand *, ea *, int, int, int, int32_t);
 187 static void add_asp(insn *, int);
 188
 189 static int has_prefix(insn * ins, enum prefix_pos pos, enum prefixes prefix)
 190 {
 191     return ins->prefixes[pos] == prefix;
 192 }
 193
 194 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 195 {
 196     if (ins->prefixes[pos])
 197         errfunc(ERR_NONFATAL, "invalid %s prefix",
 198                 prefix_name(ins->prefixes[pos]));
 199 }
 200
 201 static const char *size_name(int size)
 202 {
 203     switch (size) {
 204     case 1:
 205         return "byte";
 206     case 2:
 207         return "word";
 208     case 4:
 209         return "dword";
 210     case 8:
 211         return "qword";
 212     case 10:
 213         return "tword";
 214     case 16:
 215         return "oword";
 216     case 32:
 217         return "yword";
 218     default:
 219         return "???";
 220     }
 221 }
 222
 223 static void warn_overflow(int size, const struct operand *o)
 224 {
 225     if (size < 8 && o->wrt == NO_SEG && o->segment == NO_SEG) {
 226         int64_t lim = ((int64_t)1 << (size*8))-1;
 227         int64_t data = o->offset;
 228
 229         if (data < ~lim || data > lim)
 230             errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
 231                     "%s data exceeds bounds", size_name(size));
 232     }
 233 }
 234 /*
 235  * This routine wrappers the real output format's output routine,
 236  * in order to pass a copy of the data off to the listing file
 237  * generator at the same time.
 238  */
 239 static void out(int64_t offset, int32_t segto, const void *data,
 240                 enum out_type type, uint64_t size,
 241                 int32_t segment, int32_t wrt)
 242 {
 243     static int32_t lineno = 0;     /* static!!! */
 244     static char *lnfname = NULL;
 245     uint8_t p[8];
 246
 247     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 248         /*
 249          * This is a non-relocated address, and we're going to
 250          * convert it into RAWDATA format.
 251          */
 252         uint8_t *q = p;
 253
 254         if (size > 8) {
 255             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 256             return;
 257         }
 258
 259         WRITEADDR(q, *(int64_t *)data, size);
 260         data = p;
 261         type = OUT_RAWDATA;
 262     }
 263
 264     list->output(offset, data, type, size);
 265
 266     /*
 267      * this call to src_get determines when we call the
 268      * debug-format-specific "linenum" function
 269      * it updates lineno and lnfname to the current values
 270      * returning 0 if "same as last time", -2 if lnfname
 271      * changed, and the amount by which lineno changed,
 272      * if it did. thus, these variables must be static
 273      */
 274
 275     if (src_get(&lineno, &lnfname)) {
 276         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 277     }
 278
 279     outfmt->output(segto, data, type, size, segment, wrt);
 280 }
 281
 282 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 283                      insn * ins, const uint8_t *code)
 284 {
 285     int64_t isize;
 286     uint8_t c = code[0];
 287
 288     if ((c != 0370 && c != 0371) || (ins->oprs[0].type & STRICT))
 289         return false;
 290     if (!optimizing)
 291         return false;
 292     if (optimizing < 0 && c == 0371)
 293         return false;
 294
 295     isize = calcsize(segment, offset, bits, ins, code);
 296
 297     if (ins->oprs[0].opflags & OPFLAG_UNKNOWN)
 298         /* Be optimistic in pass 1 */
 299         return true;
 300
 301     if (ins->oprs[0].segment != segment)
 302         return false;
 303
 304     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 305     return (isize >= -128 && isize <= 127); /* is it byte size? */
 306 }
 307
 308 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 309               insn * instruction, struct ofmt *output, efunc error,
 310               ListGen * listgen)
 311 {
 312     const struct itemplate *temp;
 313     int j;
 314     int size_prob;
 315     int64_t insn_end;
 316     int32_t itimes;
 317     int64_t start = offset;
 318     int64_t wsize = 0;             /* size for DB etc. */
 319
 320     errfunc = error;            /* to pass to other functions */
 321     cpu = cp;
 322     outfmt = output;            /* likewise */
 323     list = listgen;             /* and again */
 324
 325     switch (instruction->opcode) {
 326     case -1:
 327         return 0;
 328     case I_DB:
 329         wsize = 1;
 330         break;
 331     case I_DW:
 332         wsize = 2;
 333         break;
 334     case I_DD:
 335         wsize = 4;
 336         break;
 337     case I_DQ:
 338         wsize = 8;
 339         break;
 340     case I_DT:
 341         wsize = 10;
 342         break;
 343     case I_DO:
 344         wsize = 16;
 345         break;
 346     case I_DY:
 347         wsize = 32;
 348         break;
 349     default:
 350         break;
 351     }
 352
 353     if (wsize) {
 354         extop *e;
 355         int32_t t = instruction->times;
 356         if (t < 0)
 357             errfunc(ERR_PANIC,
 358                     "instruction->times < 0 (%ld) in assemble()", t);
 359
 360         while (t--) {           /* repeat TIMES times */
 361             for (e = instruction->eops; e; e = e->next) {
 362                 if (e->type == EOT_DB_NUMBER) {
 363                     if (wsize == 1) {
 364                         if (e->segment != NO_SEG)
 365                             errfunc(ERR_NONFATAL,
 366                                     "one-byte relocation attempted");
 367                         else {
 368                             uint8_t out_byte = e->offset;
 369                             out(offset, segment, &out_byte,
 370                                 OUT_RAWDATA, 1, NO_SEG, NO_SEG);
 371                         }
 372                     } else if (wsize > 8) {
 373                         errfunc(ERR_NONFATAL,
 374                                 "integer supplied to a DT, DO or DY"
 375                                 " instruction");
 376                     } else
 377                         out(offset, segment, &e->offset,
 378                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 379                     offset += wsize;
 380                 } else if (e->type == EOT_DB_STRING ||
 381                            e->type == EOT_DB_STRING_FREE) {
 382                     int align;
 383
 384                     out(offset, segment, e->stringval,
 385                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 386                     align = e->stringlen % wsize;
 387
 388                     if (align) {
 389                         align = wsize - align;
 390                         out(offset, segment, zero_buffer,
 391                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 392                     }
 393                     offset += e->stringlen + align;
 394                 }
 395             }
 396             if (t > 0 && t == instruction->times - 1) {
 397                 /*
 398                  * Dummy call to list->output to give the offset to the
 399                  * listing module.
 400                  */
 401                 list->output(offset, NULL, OUT_RAWDATA, 0);
 402                 list->uplevel(LIST_TIMES);
 403             }
 404         }
 405         if (instruction->times > 1)
 406             list->downlevel(LIST_TIMES);
 407         return offset - start;
 408     }
 409
 410     if (instruction->opcode == I_INCBIN) {
 411         const char *fname = instruction->eops->stringval;
 412         FILE *fp;
 413
 414         fp = fopen(fname, "rb");
 415         if (!fp) {
 416             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 417                   fname);
 418         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 419             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 420                   fname);
 421         } else {
 422             static char buf[4096];
 423             size_t t = instruction->times;
 424             size_t base = 0;
 425             size_t len;
 426
 427             len = ftell(fp);
 428             if (instruction->eops->next) {
 429                 base = instruction->eops->next->offset;
 430                 len -= base;
 431                 if (instruction->eops->next->next &&
 432                     len > (size_t)instruction->eops->next->next->offset)
 433                     len = (size_t)instruction->eops->next->next->offset;
 434             }
 435             /*
 436              * Dummy call to list->output to give the offset to the
 437              * listing module.
 438              */
 439             list->output(offset, NULL, OUT_RAWDATA, 0);
 440             list->uplevel(LIST_INCBIN);
 441             while (t--) {
 442                 size_t l;
 443
 444                 fseek(fp, base, SEEK_SET);
 445                 l = len;
 446                 while (l > 0) {
 447                     int32_t m;
 448                     m = fread(buf, 1, l > sizeof(buf) ? sizeof(buf) : l, fp);
 449                     if (!m) {
 450                         /*
 451                          * This shouldn't happen unless the file
 452                          * actually changes while we are reading
 453                          * it.
 454                          */
 455                         error(ERR_NONFATAL,
 456                               "`incbin': unexpected EOF while"
 457                               " reading file `%s'", fname);
 458                         t = 0;  /* Try to exit cleanly */
 459                         break;
 460                     }
 461                     out(offset, segment, buf, OUT_RAWDATA, m,
 462                         NO_SEG, NO_SEG);
 463                     l -= m;
 464                 }
 465             }
 466             list->downlevel(LIST_INCBIN);
 467             if (instruction->times > 1) {
 468                 /*
 469                  * Dummy call to list->output to give the offset to the
 470                  * listing module.
 471                  */
 472                 list->output(offset, NULL, OUT_RAWDATA, 0);
 473                 list->uplevel(LIST_TIMES);
 474                 list->downlevel(LIST_TIMES);
 475             }
 476             fclose(fp);
 477             return instruction->times * len;
 478         }
 479         return 0;               /* if we're here, there's an error */
 480     }
 481
 482     /* Check to see if we need an address-size prefix */
 483     add_asp(instruction, bits);
 484
 485     size_prob = 0;
 486
 487     for (temp = nasm_instructions[instruction->opcode]; temp->opcode != -1; temp++){
 488         int m = matches(temp, instruction, bits);
 489         if (m == 100 ||
 490             (m == 99 && jmp_match(segment, offset, bits,
 491                                   instruction, temp->code))) {
 492             /* Matches! */
 493             int64_t insn_size = calcsize(segment, offset, bits,
 494                                       instruction, temp->code);
 495             itimes = instruction->times;
 496             if (insn_size < 0)  /* shouldn't be, on pass two */
 497                 error(ERR_PANIC, "errors made it through from pass one");
 498             else
 499                 while (itimes--) {
 500                     for (j = 0; j < MAXPREFIX; j++) {
 501                         uint8_t c = 0;
 502                         switch (instruction->prefixes[j]) {
 503                         case P_WAIT:
 504                             c = 0x9B;
 505                             break;
 506                         case P_LOCK:
 507                             c = 0xF0;
 508                             break;
 509                         case P_REPNE:
 510                         case P_REPNZ:
 511                             c = 0xF2;
 512                             break;
 513                         case P_REPE:
 514                         case P_REPZ:
 515                         case P_REP:
 516                             c = 0xF3;
 517                             break;
 518                         case R_CS:
 519                             if (bits == 64) {
 520                                 error(ERR_WARNING | ERR_PASS2,
 521                                       "cs segment base generated, but will be ignored in 64-bit mode");
 522                             }
 523                             c = 0x2E;
 524                             break;
 525                         case R_DS:
 526                             if (bits == 64) {
 527                                 error(ERR_WARNING | ERR_PASS2,
 528                                       "ds segment base generated, but will be ignored in 64-bit mode");
 529                             }
 530                             c = 0x3E;
 531                             break;
 532                         case R_ES:
 533                            if (bits == 64) {
 534                                 error(ERR_WARNING | ERR_PASS2,
 535                                       "es segment base generated, but will be ignored in 64-bit mode");
 536                            }
 537                             c = 0x26;
 538                             break;
 539                         case R_FS:
 540                             c = 0x64;
 541                             break;
 542                         case R_GS:
 543                             c = 0x65;
 544                             break;
 545                         case R_SS:
 546                             if (bits == 64) {
 547                                 error(ERR_WARNING | ERR_PASS2,
 548                                       "ss segment base generated, but will be ignored in 64-bit mode");
 549                             }
 550                             c = 0x36;
 551                             break;
 552                         case R_SEGR6:
 553                         case R_SEGR7:
 554                             error(ERR_NONFATAL,
 555                                   "segr6 and segr7 cannot be used as prefixes");
 556                             break;
 557                         case P_A16:
 558                             if (bits == 64) {
 559                                 error(ERR_NONFATAL,
 560                                       "16-bit addressing is not supported "
 561                                       "in 64-bit mode");
 562                             } else if (bits != 16)
 563                                 c = 0x67;
 564                             break;
 565                         case P_A32:
 566                             if (bits != 32)
 567                                 c = 0x67;
 568                             break;
 569                         case P_A64:
 570                             if (bits != 64) {
 571                                 error(ERR_NONFATAL,
 572                                       "64-bit addressing is only supported "
 573                                       "in 64-bit mode");
 574                             }
 575                             break;
 576                         case P_ASP:
 577                             c = 0x67;
 578                             break;
 579                         case P_O16:
 580                             if (bits != 16)
 581                                 c = 0x66;
 582                             break;
 583                         case P_O32:
 584                             if (bits == 16)
 585                                 c = 0x66;
 586                             break;
 587                         case P_O64:
 588                             /* REX.W */
 589                             break;
 590                         case P_OSP:
 591                             c = 0x66;
 592                             break;
 593                         case P_none:
 594                             break;
 595                         default:
 596                             error(ERR_PANIC, "invalid instruction prefix");
 597                         }
 598                         if (c != 0) {
 599                             out(offset, segment, &c, OUT_RAWDATA, 1,
 600                                 NO_SEG, NO_SEG);
 601                             offset++;
 602                         }
 603                     }
 604                     insn_end = offset + insn_size;
 605                     gencode(segment, offset, bits, instruction,
 606                             temp, insn_end);
 607                     offset += insn_size;
 608                     if (itimes > 0 && itimes == instruction->times - 1) {
 609                         /*
 610                          * Dummy call to list->output to give the offset to the
 611                          * listing module.
 612                          */
 613                         list->output(offset, NULL, OUT_RAWDATA, 0);
 614                         list->uplevel(LIST_TIMES);
 615                     }
 616                 }
 617             if (instruction->times > 1)
 618                 list->downlevel(LIST_TIMES);
 619             return offset - start;
 620         } else if (m > 0 && m > size_prob) {
 621             size_prob = m;
 622         }
 623     }
 624
 625     if (temp->opcode == -1) {   /* didn't match any instruction */
 626         switch (size_prob) {
 627         case 1:
 628             error(ERR_NONFATAL, "operation size not specified");
 629             break;
 630         case 2:
 631             error(ERR_NONFATAL, "mismatch in operand sizes");
 632             break;
 633         case 3:
 634             error(ERR_NONFATAL, "no instruction for this cpu level");
 635             break;
 636         case 4:
 637             error(ERR_NONFATAL, "instruction not supported in %d-bit mode",
 638                   bits);
 639             break;
 640         default:
 641             error(ERR_NONFATAL,
 642                   "invalid combination of opcode and operands");
 643             break;
 644         }
 645     }
 646     return 0;
 647 }
 648
 649 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 650                insn * instruction, efunc error)
 651 {
 652     const struct itemplate *temp;
 653
 654     errfunc = error;            /* to pass to other functions */
 655     cpu = cp;
 656
 657     if (instruction->opcode == -1)
 658         return 0;
 659
 660     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 661         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 662         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 663         instruction->opcode == I_DY) {
 664         extop *e;
 665         int32_t isize, osize, wsize = 0;   /* placate gcc */
 666
 667         isize = 0;
 668         switch (instruction->opcode) {
 669         case I_DB:
 670             wsize = 1;
 671             break;
 672         case I_DW:
 673             wsize = 2;
 674             break;
 675         case I_DD:
 676             wsize = 4;
 677             break;
 678         case I_DQ:
 679             wsize = 8;
 680             break;
 681         case I_DT:
 682             wsize = 10;
 683             break;
 684         case I_DO:
 685             wsize = 16;
 686             break;
 687         case I_DY:
 688             wsize = 32;
 689             break;
 690         default:
 691             break;
 692         }
 693
 694         for (e = instruction->eops; e; e = e->next) {
 695             int32_t align;
 696
 697             osize = 0;
 698             if (e->type == EOT_DB_NUMBER)
 699                 osize = 1;
 700             else if (e->type == EOT_DB_STRING ||
 701                      e->type == EOT_DB_STRING_FREE)
 702                 osize = e->stringlen;
 703
 704             align = (-osize) % wsize;
 705             if (align < 0)
 706                 align += wsize;
 707             isize += osize + align;
 708         }
 709         return isize * instruction->times;
 710     }
 711
 712     if (instruction->opcode == I_INCBIN) {
 713         const char *fname = instruction->eops->stringval;
 714         FILE *fp;
 715         size_t len;
 716
 717         fp = fopen(fname, "rb");
 718         if (!fp)
 719             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 720                   fname);
 721         else if (fseek(fp, 0L, SEEK_END) < 0)
 722             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 723                   fname);
 724         else {
 725             len = ftell(fp);
 726             fclose(fp);
 727             if (instruction->eops->next) {
 728                 len -= instruction->eops->next->offset;
 729                 if (instruction->eops->next->next &&
 730                     len > (size_t)instruction->eops->next->next->offset) {
 731                     len = (size_t)instruction->eops->next->next->offset;
 732                 }
 733             }
 734             return instruction->times * len;
 735         }
 736         return 0;               /* if we're here, there's an error */
 737     }
 738
 739     /* Check to see if we need an address-size prefix */
 740     add_asp(instruction, bits);
 741
 742     for (temp = nasm_instructions[instruction->opcode]; temp->opcode != -1; temp++) {
 743         int m = matches(temp, instruction, bits);
 744         if (m == 100 ||
 745             (m == 99 && jmp_match(segment, offset, bits,
 746                                   instruction, temp->code))) {
 747             /* we've matched an instruction. */
 748             int64_t isize;
 749             const uint8_t *codes = temp->code;
 750             int j;
 751
 752             isize = calcsize(segment, offset, bits, instruction, codes);
 753             if (isize < 0)
 754                 return -1;
 755             for (j = 0; j < MAXPREFIX; j++) {
 756                 switch (instruction->prefixes[j]) {
 757                 case P_A16:
 758                     if (bits != 16)
 759                         isize++;
 760                     break;
 761                 case P_A32:
 762                     if (bits != 32)
 763                         isize++;
 764                     break;
 765                 case P_O16:
 766                     if (bits != 16)
 767                         isize++;
 768                     break;
 769                 case P_O32:
 770                     if (bits == 16)
 771                         isize++;
 772                     break;
 773                 case P_A64:
 774                 case P_O64:
 775                 case P_none:
 776                     break;
 777                 default:
 778                     isize++;
 779                     break;
 780                 }
 781             }
 782             return isize * instruction->times;
 783         }
 784     }
 785     return -1;                  /* didn't match any instruction */
 786 }
 787
 788 static bool possible_sbyte(operand *o)
 789 {
 790     return o->wrt == NO_SEG && o->segment == NO_SEG &&
 791         !(o->opflags & OPFLAG_UNKNOWN) &&
 792         optimizing >= 0 && !(o->type & STRICT);
 793 }
 794
 795 /* check that opn[op]  is a signed byte of size 16 or 32 */
 796 static bool is_sbyte16(operand *o)
 797 {
 798     int16_t v;
 799
 800     if (!possible_sbyte(o))
 801         return false;
 802
 803     v = o->offset;
 804     return v >= -128 && v <= 127;
 805 }
 806
 807 static bool is_sbyte32(operand *o)
 808 {
 809     int32_t v;
 810
 811     if (!possible_sbyte(o))
 812         return false;
 813
 814     v = o->offset;
 815     return v >= -128 && v <= 127;
 816 }
 817
 818 /* Common construct */
 819 #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
 820
 821 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 822                         insn * ins, const uint8_t *codes)
 823 {
 824     int64_t length = 0;
 825     uint8_t c;
 826     int rex_mask = ~0;
 827     int op1, op2;
 828     struct operand *opx;
 829     uint8_t opex = 0;
 830
 831     ins->rex = 0;               /* Ensure REX is reset */
 832
 833     if (ins->prefixes[PPS_OSIZE] == P_O64)
 834         ins->rex |= REX_W;
 835
 836     (void)segment;              /* Don't warn that this parameter is unused */
 837     (void)offset;               /* Don't warn that this parameter is unused */
 838
 839     while (*codes) {
 840         c = *codes++;
 841         op1 = (c & 3) + ((opex & 1) << 2);
 842         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
 843         opx = &ins->oprs[op1];
 844         opex = 0;               /* For the next iteration */
 845
 846         switch (c) {
 847         case 01:
 848         case 02:
 849         case 03:
 850         case 04:
 851             codes += c, length += c;
 852             break;
 853
 854         case 05:
 855         case 06:
 856         case 07:
 857             opex = c;
 858             break;
 859
 860         case4(010):
 861             ins->rex |=
 862                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 863             codes++, length++;
 864             break;
 865
 866         case4(014):
 867         case4(020):
 868         case4(024):
 869             length++;
 870             break;
 871
 872         case4(030):
 873             length += 2;
 874             break;
 875
 876         case4(034):
 877             if (opx->type & (BITS16 | BITS32 | BITS64))
 878                 length += (opx->type & BITS16) ? 2 : 4;
 879             else
 880                 length += (bits == 16) ? 2 : 4;
 881             break;
 882
 883         case4(040):
 884             length += 4;
 885             break;
 886
 887         case4(044):
 888             length += ins->addr_size >> 3;
 889             break;
 890
 891         case4(050):
 892             length++;
 893             break;
 894
 895         case4(054):
 896             length += 8; /* MOV reg64/imm */
 897             break;
 898
 899         case4(060):
 900             length += 2;
 901             break;
 902
 903         case4(064):
 904             if (opx->type & (BITS16 | BITS32 | BITS64))
 905                 length += (opx->type & BITS16) ? 2 : 4;
 906             else
 907                 length += (bits == 16) ? 2 : 4;
 908             break;
 909
 910         case4(070):
 911             length += 4;
 912             break;
 913
 914         case4(074):
 915             length += 2;
 916             break;
 917
 918         case4(0140):
 919             length += is_sbyte16(opx) ? 1 : 2;
 920             break;
 921
 922         case4(0144):
 923             codes++;
 924             length++;
 925             break;
 926
 927         case4(0150):
 928             length += is_sbyte32(opx) ? 1 : 4;
 929             break;
 930
 931         case4(0154):
 932             codes++;
 933             length++;
 934             break;
 935
 936         case4(0160):
 937             length++;
 938             ins->rex |= REX_D;
 939             ins->drexdst = regval(opx);
 940             break;
 941
 942         case4(0164):
 943             length++;
 944             ins->rex |= REX_D|REX_OC;
 945             ins->drexdst = regval(opx);
 946             break;
 947
 948         case 0171:
 949             break;
 950
 951         case 0172:
 952         case 0173:
 953         case 0174:
 954             codes++;
 955             length++;
 956             break;
 957
 958         case4(0250):
 959             length += is_sbyte32(opx) ? 1 : 4;
 960             break;
 961
 962         case4(0254):
 963             length += 4;
 964             break;
 965
 966         case4(0260):
 967             ins->rex |= REX_V;
 968             ins->drexdst = regval(opx);
 969             ins->vex_cm = *codes++;
 970             ins->vex_wlp = *codes++;
 971             break;
 972
 973         case 0270:
 974             ins->rex |= REX_V;
 975             ins->drexdst = 0;
 976             ins->vex_cm = *codes++;
 977             ins->vex_wlp = *codes++;
 978             break;
 979
 980         case4(0274):
 981             length++;
 982             break;
 983
 984         case4(0300):
 985             break;
 986
 987         case 0310:
 988             if (bits == 64)
 989                 return -1;
 990             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
 991             break;
 992
 993         case 0311:
 994             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
 995             break;
 996
 997         case 0312:
 998             break;
 999
1000         case 0313:
1001             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
1002                 has_prefix(ins, PPS_ASIZE, P_A32))
1003                 return -1;
1004             break;
1005
1006         case4(0314):
1007             break;
1008
1009         case 0320:
1010             length += (bits != 16);
1011             break;
1012
1013         case 0321:
1014             length += (bits == 16);
1015             break;
1016
1017         case 0322:
1018             break;
1019
1020         case 0323:
1021             rex_mask &= ~REX_W;
1022             break;
1023
1024         case 0324:
1025             ins->rex |= REX_W;
1026             break;
1027
1028         case 0325:
1029             ins->rex |= REX_NH;
1030             break;
1031
1032         case 0330:
1033             codes++, length++;
1034             break;
1035
1036         case 0331:
1037             break;
1038
1039         case 0332:
1040         case 0333:
1041             length++;
1042             break;
1043
1044         case 0334:
1045             ins->rex |= REX_L;
1046             break;
1047
1048         case 0335:
1049             break;
1050
1051         case 0336:
1052             if (!ins->prefixes[PPS_LREP])
1053                 ins->prefixes[PPS_LREP] = P_REP;
1054             break;
1055
1056         case 0337:
1057             if (!ins->prefixes[PPS_LREP])
1058                 ins->prefixes[PPS_LREP] = P_REPNE;
1059             break;
1060
1061         case 0340:
1062             if (ins->oprs[0].segment != NO_SEG)
1063                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1064                         " quantity of BSS space");
1065             else
1066                 length += ins->oprs[0].offset;
1067             break;
1068
1069         case 0341:
1070             if (!ins->prefixes[PPS_WAIT])
1071                 ins->prefixes[PPS_WAIT] = P_WAIT;
1072             break;
1073
1074         case4(0344):
1075             length++;
1076             break;
1077
1078         case 0360:
1079             break;
1080
1081         case 0361:
1082         case 0362:
1083         case 0363:
1084             length++;
1085             break;
1086
1087         case 0364:
1088         case 0365:
1089             break;
1090
1091         case 0366:
1092         case 0367:
1093             length++;
1094             break;
1095
1096         case 0370:
1097         case 0371:
1098         case 0372:
1099             break;
1100
1101         case 0373:
1102             length++;
1103             break;
1104
1105         case4(0100):
1106         case4(0110):
1107         case4(0120):
1108         case4(0130):
1109         case4(0200):
1110         case4(0204):
1111         case4(0210):
1112         case4(0214):
1113         case4(0220):
1114         case4(0224):
1115         case4(0230):
1116         case4(0234):
1117             {
1118                 ea ea_data;
1119                 int rfield;
1120                 int32_t rflags;
1121                 struct operand *opy = &ins->oprs[op2];
1122
1123                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1124
1125                 if (c <= 0177) {
1126                     /* pick rfield from operand b (opx) */
1127                     rflags = regflag(opx);
1128                     rfield = nasm_regvals[opx->basereg];
1129                 } else {
1130                     rflags = 0;
1131                     rfield = c & 7;
1132                 }
1133                 if (!process_ea(opy, &ea_data, bits,
1134                                 ins->addr_size, rfield, rflags)) {
1135                     errfunc(ERR_NONFATAL, "invalid effective address");
1136                     return -1;
1137                 } else {
1138                     ins->rex |= ea_data.rex;
1139                     length += ea_data.size;
1140                 }
1141             }
1142             break;
1143
1144         default:
1145             errfunc(ERR_PANIC, "internal instruction table corrupt"
1146                     ": instruction code \\%o (0x%02X) given", c, c);
1147             break;
1148         }
1149     }
1150
1151     ins->rex &= rex_mask;
1152
1153     if (ins->rex & REX_NH) {
1154         if (ins->rex & REX_H) {
1155             errfunc(ERR_NONFATAL, "instruction cannot use high registers");
1156             return -1;
1157         }
1158         ins->rex &= ~REX_P;     /* Don't force REX prefix due to high reg */
1159     }
1160
1161     if (ins->rex & REX_V) {
1162         int bad32 = REX_R|REX_W|REX_X|REX_B;
1163
1164         if (ins->rex & REX_H) {
1165             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1166             return -1;
1167         }
1168         switch (ins->vex_wlp & 030) {
1169         case 000:
1170         case 020:
1171             ins->rex &= ~REX_W;
1172             break;
1173         case 010:
1174             ins->rex |= REX_W;
1175             bad32 &= ~REX_W;
1176             break;
1177         case 030:
1178             /* Follow REX_W */
1179             break;
1180         }
1181
1182         if (bits != 64 && ((ins->rex & bad32) || ins->drexdst > 7)) {
1183             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1184             return -1;
1185         }
1186         if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_R|REX_B)))
1187             length += 3;
1188         else
1189             length += 2;
1190     } else if (ins->rex & REX_D) {
1191         if (ins->rex & REX_H) {
1192             errfunc(ERR_NONFATAL, "cannot use high register in drex instruction");
1193             return -1;
1194         }
1195         if (bits != 64 && ((ins->rex & (REX_R|REX_W|REX_X|REX_B)) ||
1196                            ins->drexdst > 7)) {
1197             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1198             return -1;
1199         }
1200         length++;
1201     } else if (ins->rex & REX_REAL) {
1202         if (ins->rex & REX_H) {
1203             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1204             return -1;
1205         } else if (bits == 64) {
1206             length++;
1207         } else if ((ins->rex & REX_L) &&
1208                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1209                    cpu >= IF_X86_64) {
1210             /* LOCK-as-REX.R */
1211             assert_no_prefix(ins, PPS_LREP);
1212             length++;
1213         } else {
1214             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1215             return -1;
1216         }
1217     }
1218
1219     return length;
1220 }
1221
1222 #define EMIT_REX()                                                      \
1223     if (!(ins->rex & (REX_D|REX_V)) && (ins->rex & REX_REAL) && (bits == 64)) { \
1224         ins->rex = (ins->rex & REX_REAL)|REX_P;                         \
1225         out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG); \
1226         ins->rex = 0;                                                   \
1227         offset += 1; \
1228     }
1229
1230 static void gencode(int32_t segment, int64_t offset, int bits,
1231                     insn * ins, const struct itemplate *temp,
1232                     int64_t insn_end)
1233 {
1234     static char condval[] = {   /* conditional opcodes */
1235         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1236         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1237         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1238     };
1239     uint8_t c;
1240     uint8_t bytes[4];
1241     int64_t size;
1242     int64_t data;
1243     int op1, op2;
1244     struct operand *opx;
1245     const uint8_t *codes = temp->code;
1246     uint8_t opex = 0;
1247
1248     while (*codes) {
1249         c = *codes++;
1250         op1 = (c & 3) + ((opex & 1) << 2);
1251         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
1252         opx = &ins->oprs[op1];
1253         opex = 0;               /* For the next iteration */
1254
1255         switch (c) {
1256         case 01:
1257         case 02:
1258         case 03:
1259         case 04:
1260             EMIT_REX();
1261             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1262             codes += c;
1263             offset += c;
1264             break;
1265
1266         case 05:
1267         case 06:
1268         case 07:
1269             opex = c;
1270             break;
1271
1272         case4(010):
1273             EMIT_REX();
1274             bytes[0] = *codes++ + (regval(opx) & 7);
1275             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1276             offset += 1;
1277             break;
1278
1279         case4(014):
1280             /* The test for BITS8 and SBYTE here is intended to avoid
1281                warning on optimizer actions due to SBYTE, while still
1282                warn on explicit BYTE directives.  Also warn, obviously,
1283                if the optimizer isn't enabled. */
1284             if (((opx->type & BITS8) ||
1285                  !(opx->type & temp->opd[op1] & BYTENESS)) &&
1286                 (opx->offset < -128 || opx->offset > 127)) {
1287                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1288                         "signed byte value exceeds bounds");
1289             }
1290             if (opx->segment != NO_SEG) {
1291                 data = opx->offset;
1292                 out(offset, segment, &data, OUT_ADDRESS, 1,
1293                     opx->segment, opx->wrt);
1294             } else {
1295                 bytes[0] = opx->offset;
1296                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1297                     NO_SEG);
1298             }
1299             offset += 1;
1300             break;
1301
1302         case4(020):
1303             if (opx->offset < -256 || opx->offset > 255) {
1304                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1305                         "byte value exceeds bounds");
1306             }
1307             if (opx->segment != NO_SEG) {
1308                 data = opx->offset;
1309                 out(offset, segment, &data, OUT_ADDRESS, 1,
1310                     opx->segment, opx->wrt);
1311             } else {
1312                 bytes[0] = opx->offset;
1313                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1314                     NO_SEG);
1315             }
1316             offset += 1;
1317             break;
1318
1319         case4(024):
1320             if (opx->offset < 0 || opx->offset > 255)
1321                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1322                         "unsigned byte value exceeds bounds");
1323             if (opx->segment != NO_SEG) {
1324                 data = opx->offset;
1325                 out(offset, segment, &data, OUT_ADDRESS, 1,
1326                     opx->segment, opx->wrt);
1327             } else {
1328                 bytes[0] = opx->offset;
1329                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1330                     NO_SEG);
1331             }
1332             offset += 1;
1333             break;
1334
1335         case4(030):
1336             warn_overflow(2, opx);
1337             data = opx->offset;
1338             out(offset, segment, &data, OUT_ADDRESS, 2,
1339                 opx->segment, opx->wrt);
1340             offset += 2;
1341             break;
1342
1343         case4(034):
1344             if (opx->type & (BITS16 | BITS32))
1345                 size = (opx->type & BITS16) ? 2 : 4;
1346             else
1347                 size = (bits == 16) ? 2 : 4;
1348             warn_overflow(size, opx);
1349             data = opx->offset;
1350             out(offset, segment, &data, OUT_ADDRESS, size,
1351                 opx->segment, opx->wrt);
1352             offset += size;
1353             break;
1354
1355         case4(040):
1356             warn_overflow(4, opx);
1357             data = opx->offset;
1358             out(offset, segment, &data, OUT_ADDRESS, 4,
1359                 opx->segment, opx->wrt);
1360             offset += 4;
1361             break;
1362
1363         case4(044):
1364             data = opx->offset;
1365             size = ins->addr_size >> 3;
1366             warn_overflow(size, opx);
1367             out(offset, segment, &data, OUT_ADDRESS, size,
1368                 opx->segment, opx->wrt);
1369             offset += size;
1370             break;
1371
1372         case4(050):
1373             if (opx->segment != segment)
1374                 errfunc(ERR_NONFATAL,
1375                         "short relative jump outside segment");
1376             data = opx->offset - insn_end;
1377             if (data > 127 || data < -128)
1378                 errfunc(ERR_NONFATAL, "short jump is out of range");
1379             bytes[0] = data;
1380             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1381             offset += 1;
1382             break;
1383
1384         case4(054):
1385             data = (int64_t)opx->offset;
1386             out(offset, segment, &data, OUT_ADDRESS, 8,
1387                 opx->segment, opx->wrt);
1388             offset += 8;
1389             break;
1390
1391         case4(060):
1392             if (opx->segment != segment) {
1393                 data = opx->offset;
1394                 out(offset, segment, &data,
1395                     OUT_REL2ADR, insn_end - offset,
1396                     opx->segment, opx->wrt);
1397             } else {
1398                 data = opx->offset - insn_end;
1399                 out(offset, segment, &data,
1400                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1401             }
1402             offset += 2;
1403             break;
1404
1405         case4(064):
1406             if (opx->type & (BITS16 | BITS32 | BITS64))
1407                 size = (opx->type & BITS16) ? 2 : 4;
1408             else
1409                 size = (bits == 16) ? 2 : 4;
1410             if (opx->segment != segment) {
1411                 data = opx->offset;
1412                 out(offset, segment, &data,
1413                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1414                     insn_end - offset, opx->segment, opx->wrt);
1415             } else {
1416                 data = opx->offset - insn_end;
1417                 out(offset, segment, &data,
1418                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1419             }
1420             offset += size;
1421             break;
1422
1423         case4(070):
1424             if (opx->segment != segment) {
1425                 data = opx->offset;
1426                 out(offset, segment, &data,
1427                     OUT_REL4ADR, insn_end - offset,
1428                     opx->segment, opx->wrt);
1429             } else {
1430                 data = opx->offset - insn_end;
1431                 out(offset, segment, &data,
1432                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1433             }
1434             offset += 4;
1435             break;
1436
1437         case4(074):
1438             if (opx->segment == NO_SEG)
1439                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1440                         " relocatable");
1441             data = 0;
1442             out(offset, segment, &data, OUT_ADDRESS, 2,
1443                 outfmt->segbase(1 + opx->segment),
1444                 opx->wrt);
1445             offset += 2;
1446             break;
1447
1448         case4(0140):
1449             data = opx->offset;
1450             warn_overflow(2, opx);
1451             if (is_sbyte16(opx)) {
1452                 bytes[0] = data;
1453                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1454                     NO_SEG);
1455                 offset++;
1456             } else {
1457                 out(offset, segment, &data, OUT_ADDRESS, 2,
1458                     opx->segment, opx->wrt);
1459                 offset += 2;
1460             }
1461             break;
1462
1463         case4(0144):
1464             EMIT_REX();
1465             bytes[0] = *codes++;
1466             if (is_sbyte16(opx))
1467                 bytes[0] |= 2;  /* s-bit */
1468             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1469             offset++;
1470             break;
1471
1472         case4(0150):
1473             data = opx->offset;
1474             warn_overflow(4, opx);
1475             if (is_sbyte32(opx)) {
1476                 bytes[0] = data;
1477                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1478                     NO_SEG);
1479                 offset++;
1480             } else {
1481                 out(offset, segment, &data, OUT_ADDRESS, 4,
1482                     opx->segment, opx->wrt);
1483                 offset += 4;
1484             }
1485             break;
1486
1487         case4(0154):
1488             EMIT_REX();
1489             bytes[0] = *codes++;
1490             if (is_sbyte32(opx))
1491                 bytes[0] |= 2;  /* s-bit */
1492             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1493             offset++;
1494             break;
1495
1496         case4(0160):
1497         case4(0164):
1498             break;
1499
1500         case 0171:
1501             bytes[0] =
1502                 (ins->drexdst << 4) |
1503                 (ins->rex & REX_OC ? 0x08 : 0) |
1504                 (ins->rex & (REX_R|REX_X|REX_B));
1505             ins->rex = 0;
1506             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1507             offset++;
1508             break;
1509
1510         case 0172:
1511             c = *codes++;
1512             opx = &ins->oprs[c >> 3];
1513             bytes[0] = nasm_regvals[opx->basereg] << 4;
1514             opx = &ins->oprs[c & 7];
1515             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1516                 errfunc(ERR_NONFATAL,
1517                         "non-absolute expression not permitted as argument %d",
1518                         c & 7);
1519             } else {
1520                 if (opx->offset & ~15) {
1521                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1522                             "four-bit argument exceeds bounds");
1523                 }
1524                 bytes[0] |= opx->offset & 15;
1525             }
1526             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1527             offset++;
1528             break;
1529
1530         case 0173:
1531             c = *codes++;
1532             opx = &ins->oprs[c >> 4];
1533             bytes[0] = nasm_regvals[opx->basereg] << 4;
1534             bytes[0] |= c & 15;
1535             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1536             offset++;
1537             break;
1538
1539         case 0174:
1540             c = *codes++;
1541             opx = &ins->oprs[c];
1542             bytes[0] = nasm_regvals[opx->basereg] << 4;
1543             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1544             offset++;
1545             break;
1546
1547         case4(0250):
1548             data = opx->offset;
1549             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1550                 (int32_t)data != (int64_t)data) {
1551                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1552                         "signed dword immediate exceeds bounds");
1553             }
1554             if (is_sbyte32(opx)) {
1555                 bytes[0] = data;
1556                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1557                     NO_SEG);
1558                 offset++;
1559             } else {
1560                 out(offset, segment, &data, OUT_ADDRESS, 4,
1561                     opx->segment, opx->wrt);
1562                 offset += 4;
1563             }
1564             break;
1565
1566         case4(0254):
1567             data = opx->offset;
1568             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1569                 (int32_t)data != (int64_t)data) {
1570                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1571                         "signed dword immediate exceeds bounds");
1572             }
1573             out(offset, segment, &data, OUT_ADDRESS, 4,
1574                 opx->segment, opx->wrt);
1575             offset += 4;
1576             break;
1577
1578         case4(0260):
1579         case 0270:
1580             codes += 2;
1581             if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1582                 bytes[0] = (ins->vex_cm >> 6) ? 0x8f : 0xc4;
1583                 bytes[1] = (ins->vex_cm & 31) | ((~ins->rex & 7) << 5);
1584                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1585                     ((~ins->drexdst & 15)<< 3) | (ins->vex_wlp & 07);
1586                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1587                 offset += 3;
1588             } else {
1589                 bytes[0] = 0xc5;
1590                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1591                     ((~ins->drexdst & 15) << 3) | (ins->vex_wlp & 07);
1592                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1593                 offset += 2;
1594             }
1595             break;
1596
1597         case4(0274):
1598         {
1599             uint64_t uv, um;
1600             int s;
1601
1602             if (ins->rex & REX_W)
1603                 s = 64;
1604             else if (ins->prefixes[PPS_OSIZE] == P_O16)
1605                 s = 16;
1606             else if (ins->prefixes[PPS_OSIZE] == P_O32)
1607                 s = 32;
1608             else
1609                 s = bits;
1610
1611             um = (uint64_t)2 << (s-1);
1612             uv = opx->offset;
1613
1614             if (uv > 127 && uv < (uint64_t)-128 &&
1615                 (uv < um-128 || uv > um-1)) {
1616                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1617                         "signed byte value exceeds bounds");
1618             }
1619             if (opx->segment != NO_SEG) {
1620                 data = uv;
1621                 out(offset, segment, &data, OUT_ADDRESS, 1,
1622                     opx->segment, opx->wrt);
1623             } else {
1624                 bytes[0] = uv;
1625                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1626                     NO_SEG);
1627             }
1628             offset += 1;
1629             break;
1630         }
1631
1632         case4(0300):
1633             break;
1634
1635         case 0310:
1636             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1637                 *bytes = 0x67;
1638                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1639                 offset += 1;
1640             } else
1641                 offset += 0;
1642             break;
1643
1644         case 0311:
1645             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1646                 *bytes = 0x67;
1647                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1648                 offset += 1;
1649             } else
1650                 offset += 0;
1651             break;
1652
1653         case 0312:
1654             break;
1655
1656         case 0313:
1657             ins->rex = 0;
1658             break;
1659
1660         case4(0314):
1661             break;
1662
1663         case 0320:
1664             if (bits != 16) {
1665                 *bytes = 0x66;
1666                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1667                 offset += 1;
1668             } else
1669                 offset += 0;
1670             break;
1671
1672         case 0321:
1673             if (bits == 16) {
1674                 *bytes = 0x66;
1675                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1676                 offset += 1;
1677             } else
1678                 offset += 0;
1679             break;
1680
1681         case 0322:
1682         case 0323:
1683             break;
1684
1685         case 0324:
1686             ins->rex |= REX_W;
1687             break;
1688
1689         case 0325:
1690             break;
1691
1692         case 0330:
1693             *bytes = *codes++ ^ condval[ins->condition];
1694             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1695             offset += 1;
1696             break;
1697
1698         case 0331:
1699             break;
1700
1701         case 0332:
1702         case 0333:
1703             *bytes = c - 0332 + 0xF2;
1704             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1705             offset += 1;
1706             break;
1707
1708         case 0334:
1709             if (ins->rex & REX_R) {
1710                 *bytes = 0xF0;
1711                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1712                 offset += 1;
1713             }
1714             ins->rex &= ~(REX_L|REX_R);
1715             break;
1716
1717         case 0335:
1718             break;
1719
1720         case 0336:
1721         case 0337:
1722             break;
1723
1724         case 0340:
1725             if (ins->oprs[0].segment != NO_SEG)
1726                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1727             else {
1728                 int64_t size = ins->oprs[0].offset;
1729                 if (size > 0)
1730                     out(offset, segment, NULL,
1731                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1732                 offset += size;
1733             }
1734             break;
1735
1736         case 0341:
1737             break;
1738
1739         case 0344:
1740         case 0345:
1741             bytes[0] = c & 1;
1742             switch (ins->oprs[0].basereg) {
1743             case R_CS:
1744                 bytes[0] += 0x0E;
1745                 break;
1746             case R_DS:
1747                 bytes[0] += 0x1E;
1748                 break;
1749             case R_ES:
1750                 bytes[0] += 0x06;
1751                 break;
1752             case R_SS:
1753                 bytes[0] += 0x16;
1754                 break;
1755             default:
1756                 errfunc(ERR_PANIC,
1757                         "bizarre 8086 segment register received");
1758             }
1759             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1760             offset++;
1761             break;
1762
1763         case 0346:
1764         case 0347:
1765             bytes[0] = c & 1;
1766             switch (ins->oprs[0].basereg) {
1767             case R_FS:
1768                 bytes[0] += 0xA0;
1769                 break;
1770             case R_GS:
1771                 bytes[0] += 0xA8;
1772                 break;
1773             default:
1774                 errfunc(ERR_PANIC,
1775                         "bizarre 386 segment register received");
1776             }
1777             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1778             offset++;
1779             break;
1780
1781         case 0360:
1782             break;
1783
1784         case 0361:
1785             bytes[0] = 0x66;
1786             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1787             offset += 1;
1788             break;
1789
1790         case 0362:
1791         case 0363:
1792             bytes[0] = c - 0362 + 0xf2;
1793             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1794             offset += 1;
1795             break;
1796
1797         case 0364:
1798         case 0365:
1799             break;
1800
1801         case 0366:
1802         case 0367:
1803             *bytes = c - 0366 + 0x66;
1804             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1805             offset += 1;
1806             break;
1807
1808         case 0370:
1809         case 0371:
1810         case 0372:
1811             break;
1812
1813         case 0373:
1814             *bytes = bits == 16 ? 3 : 5;
1815             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1816             offset += 1;
1817             break;
1818
1819         case4(0100):
1820         case4(0110):
1821         case4(0120):
1822         case4(0130):
1823         case4(0200):
1824         case4(0204):
1825         case4(0210):
1826         case4(0214):
1827         case4(0220):
1828         case4(0224):
1829         case4(0230):
1830         case4(0234):
1831             {
1832                 ea ea_data;
1833                 int rfield;
1834                 int32_t rflags;
1835                 uint8_t *p;
1836                 int32_t s;
1837                 enum out_type type;
1838                 struct operand *opy = &ins->oprs[op2];
1839
1840                 if (c <= 0177) {
1841                     /* pick rfield from operand b (opx) */
1842                     rflags = regflag(opx);
1843                     rfield = nasm_regvals[opx->basereg];
1844                 } else {
1845                     /* rfield is constant */
1846                     rflags = 0;
1847                     rfield = c & 7;
1848                 }
1849
1850                 if (!process_ea(opy, &ea_data, bits, ins->addr_size,
1851                                 rfield, rflags)) {
1852                     errfunc(ERR_NONFATAL, "invalid effective address");
1853                 }
1854
1855
1856                 p = bytes;
1857                 *p++ = ea_data.modrm;
1858                 if (ea_data.sib_present)
1859                     *p++ = ea_data.sib;
1860
1861                 /* DREX suffixes come between the SIB and the displacement */
1862                 if (ins->rex & REX_D) {
1863                     *p++ = (ins->drexdst << 4) |
1864                            (ins->rex & REX_OC ? 0x08 : 0) |
1865                            (ins->rex & (REX_R|REX_X|REX_B));
1866                     ins->rex = 0;
1867                 }
1868
1869                 s = p - bytes;
1870                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1871
1872                 /*
1873                  * Make sure the address gets the right offset in case
1874                  * the line breaks in the .lst file (BR 1197827)
1875                  */
1876                 offset += s;
1877                 s = 0;
1878
1879                 switch (ea_data.bytes) {
1880                 case 0:
1881                     break;
1882                 case 1:
1883                 case 2:
1884                 case 4:
1885                 case 8:
1886                     data = opy->offset;
1887                     warn_overflow(ea_data.bytes, opy);
1888                     s += ea_data.bytes;
1889                     if (ea_data.rip) {
1890                         if (opy->segment == segment) {
1891                             data -= insn_end;
1892                             out(offset, segment, &data, OUT_ADDRESS,
1893                                 ea_data.bytes, NO_SEG, NO_SEG);
1894                         } else {
1895                             out(offset, segment, &data, OUT_REL4ADR,
1896                                 insn_end - offset, opy->segment, opy->wrt);
1897                         }
1898                     } else {
1899                         type = OUT_ADDRESS;
1900                         out(offset, segment, &data, OUT_ADDRESS,
1901                             ea_data.bytes, opy->segment, opy->wrt);
1902                     }
1903                     break;
1904                 default:
1905                     /* Impossible! */
1906                     errfunc(ERR_PANIC,
1907                             "Invalid amount of bytes (%d) for offset?!",
1908                             ea_data.bytes);
1909                     break;
1910                 }
1911                 offset += s;
1912             }
1913             break;
1914
1915         default:
1916             errfunc(ERR_PANIC, "internal instruction table corrupt"
1917                     ": instruction code \\%o (0x%02X) given", c, c);
1918             break;
1919         }
1920     }
1921 }
1922
1923 static int32_t regflag(const operand * o)
1924 {
1925     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1926         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1927     }
1928     return nasm_reg_flags[o->basereg];
1929 }
1930
1931 static int32_t regval(const operand * o)
1932 {
1933     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1934         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1935     }
1936     return nasm_regvals[o->basereg];
1937 }
1938
1939 static int op_rexflags(const operand * o, int mask)
1940 {
1941     int32_t flags;
1942     int val;
1943
1944     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1945         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1946     }
1947
1948     flags = nasm_reg_flags[o->basereg];
1949     val = nasm_regvals[o->basereg];
1950
1951     return rexflags(val, flags, mask);
1952 }
1953
1954 static int rexflags(int val, int32_t flags, int mask)
1955 {
1956     int rex = 0;
1957
1958     if (val >= 8)
1959         rex |= REX_B|REX_X|REX_R;
1960     if (flags & BITS64)
1961         rex |= REX_W;
1962     if (!(REG_HIGH & ~flags))   /* AH, CH, DH, BH */
1963         rex |= REX_H;
1964     else if (!(REG8 & ~flags) && val >= 4) /* SPL, BPL, SIL, DIL */
1965         rex |= REX_P;
1966
1967     return rex & mask;
1968 }
1969
1970 static int matches(const struct itemplate *itemp, insn * instruction, int bits)
1971 {
1972     int i, size[MAX_OPERANDS], asize, oprs, ret;
1973
1974     ret = 100;
1975
1976     /*
1977      * Check the opcode
1978      */
1979     if (itemp->opcode != instruction->opcode)
1980         return 0;
1981
1982     /*
1983      * Count the operands
1984      */
1985     if (itemp->operands != instruction->operands)
1986         return 0;
1987
1988     /*
1989      * Check that no spurious colons or TOs are present
1990      */
1991     for (i = 0; i < itemp->operands; i++)
1992         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
1993             return 0;
1994
1995     /*
1996      * Process size flags
1997      */
1998     if (itemp->flags & IF_ARMASK) {
1999         memset(size, 0, sizeof size);
2000
2001         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
2002
2003         switch (itemp->flags & IF_SMASK) {
2004         case IF_SB:
2005             size[i] = BITS8;
2006             break;
2007         case IF_SW:
2008             size[i] = BITS16;
2009             break;
2010         case IF_SD:
2011             size[i] = BITS32;
2012             break;
2013         case IF_SQ:
2014             size[i] = BITS64;
2015             break;
2016         case IF_SO:
2017             size[i] = BITS128;
2018             break;
2019         case IF_SY:
2020             size[i] = BITS256;
2021             break;
2022         case IF_SZ:
2023             switch (bits) {
2024             case 16:
2025                 size[i] = BITS16;
2026                 break;
2027             case 32:
2028                 size[i] = BITS32;
2029                 break;
2030             case 64:
2031                 size[i] = BITS64;
2032                 break;
2033             }
2034             break;
2035         default:
2036             break;
2037         }
2038     } else {
2039         asize = 0;
2040         switch (itemp->flags & IF_SMASK) {
2041         case IF_SB:
2042             asize = BITS8;
2043             break;
2044         case IF_SW:
2045             asize = BITS16;
2046             break;
2047         case IF_SD:
2048             asize = BITS32;
2049             break;
2050         case IF_SQ:
2051             asize = BITS64;
2052             break;
2053         case IF_SO:
2054             asize = BITS128;
2055             break;
2056         case IF_SY:
2057             asize = BITS256;
2058             break;
2059         case IF_SZ:
2060             switch (bits) {
2061             case 16:
2062                 asize = BITS16;
2063                 break;
2064             case 32:
2065                 asize = BITS32;
2066                 break;
2067             case 64:
2068                 asize = BITS64;
2069                 break;
2070             }
2071             break;
2072         default:
2073             break;
2074         }
2075         for (i = 0; i < MAX_OPERANDS; i++)
2076             size[i] = asize;
2077     }
2078
2079     /*
2080      * Check that the operand flags all match up
2081      */
2082     for (i = 0; i < itemp->operands; i++) {
2083         int32_t type = instruction->oprs[i].type;
2084         if (!(type & SIZE_MASK))
2085             type |= size[i];
2086
2087         if (itemp->opd[i] & SAME_AS) {
2088             int j = itemp->opd[i] & ~SAME_AS;
2089             if (type != instruction->oprs[j].type ||
2090                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2091                 return 0;
2092         } else if (itemp->opd[i] & ~type ||
2093             ((itemp->opd[i] & SIZE_MASK) &&
2094              ((itemp->opd[i] ^ type) & SIZE_MASK))) {
2095             if ((itemp->opd[i] & ~type & ~SIZE_MASK) ||
2096                 (type & SIZE_MASK))
2097                 return 0;
2098             else
2099                 return 1;
2100         }
2101     }
2102
2103     /*
2104      * Check operand sizes
2105      */
2106     if (itemp->flags & (IF_SM | IF_SM2)) {
2107         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2108         asize = 0;
2109         for (i = 0; i < oprs; i++) {
2110             if ((asize = itemp->opd[i] & SIZE_MASK) != 0) {
2111                 int j;
2112                 for (j = 0; j < oprs; j++)
2113                     size[j] = asize;
2114                 break;
2115             }
2116         }
2117     } else {
2118         oprs = itemp->operands;
2119     }
2120
2121     for (i = 0; i < itemp->operands; i++) {
2122         if (!(itemp->opd[i] & SIZE_MASK) &&
2123             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2124             return 2;
2125     }
2126
2127     /*
2128      * Check template is okay at the set cpu level
2129      */
2130     if (((itemp->flags & IF_PLEVEL) > cpu))
2131         return 3;
2132
2133     /*
2134      * Verify the appropriate long mode flag.
2135      */
2136     if ((itemp->flags & (bits == 64 ? IF_NOLONG : IF_LONG)))
2137         return 4;
2138
2139     /*
2140      * Check if special handling needed for Jumps
2141      */
2142     if ((uint8_t)(itemp->code[0]) >= 0370)
2143         return 99;
2144
2145     return ret;
2146 }
2147
2148 static ea *process_ea(operand * input, ea * output, int bits,
2149                       int addrbits, int rfield, int32_t rflags)
2150 {
2151     bool forw_ref = !!(input->opflags & OPFLAG_UNKNOWN);
2152
2153     output->rip = false;
2154
2155     /* REX flags for the rfield operand */
2156     output->rex |= rexflags(rfield, rflags, REX_R|REX_P|REX_W|REX_H);
2157
2158     if (!(REGISTER & ~input->type)) {   /* register direct */
2159         int i;
2160         int32_t f;
2161
2162         if (input->basereg < EXPR_REG_START /* Verify as Register */
2163             || input->basereg >= REG_ENUM_LIMIT)
2164             return NULL;
2165         f = regflag(input);
2166         i = nasm_regvals[input->basereg];
2167
2168         if (REG_EA & ~f)
2169             return NULL;        /* Invalid EA register */
2170
2171         output->rex |= op_rexflags(input, REX_B|REX_P|REX_W|REX_H);
2172
2173         output->sib_present = false;             /* no SIB necessary */
2174         output->bytes = 0;  /* no offset necessary either */
2175         output->modrm = 0xC0 | ((rfield & 7) << 3) | (i & 7);
2176     } else {                    /* it's a memory reference */
2177         if (input->basereg == -1
2178             && (input->indexreg == -1 || input->scale == 0)) {
2179             /* it's a pure offset */
2180             if (bits == 64 && (~input->type & IP_REL)) {
2181               int scale, index, base;
2182               output->sib_present = true;
2183               scale = 0;
2184               index = 4;
2185               base = 5;
2186               output->sib = (scale << 6) | (index << 3) | base;
2187               output->bytes = 4;
2188               output->modrm = 4 | ((rfield & 7) << 3);
2189               output->rip = false;
2190             } else {
2191               output->sib_present = false;
2192               output->bytes = (addrbits != 16 ? 4 : 2);
2193               output->modrm = (addrbits != 16 ? 5 : 6) | ((rfield & 7) << 3);
2194               output->rip = bits == 64;
2195             }
2196         } else {                /* it's an indirection */
2197             int i = input->indexreg, b = input->basereg, s = input->scale;
2198             int32_t o = input->offset, seg = input->segment;
2199             int hb = input->hintbase, ht = input->hinttype;
2200             int t;
2201             int it, bt;
2202             int32_t ix, bx;     /* register flags */
2203
2204             if (s == 0)
2205                 i = -1;         /* make this easy, at least */
2206
2207             if (i >= EXPR_REG_START && i < REG_ENUM_LIMIT) {
2208                 it = nasm_regvals[i];
2209                 ix = nasm_reg_flags[i];
2210             } else {
2211                 it = -1;
2212                 ix = 0;
2213             }
2214
2215             if (b >= EXPR_REG_START && b < REG_ENUM_LIMIT) {
2216                 bt = nasm_regvals[b];
2217                 bx = nasm_reg_flags[b];
2218             } else {
2219                 bt = -1;
2220                 bx = 0;
2221             }
2222
2223             /* check for a 32/64-bit memory reference... */
2224             if ((ix|bx) & (BITS32|BITS64)) {
2225                 /* it must be a 32/64-bit memory reference. Firstly we have
2226                  * to check that all registers involved are type E/Rxx. */
2227                 int32_t sok = BITS32|BITS64;
2228
2229                 if (it != -1) {
2230                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2231                         sok &= ix;
2232                     else
2233                         return NULL;
2234                 }
2235
2236                 if (bt != -1) {
2237                     if (REG_GPR & ~bx)
2238                         return NULL; /* Invalid register */
2239                     if (~sok & bx & SIZE_MASK)
2240                         return NULL; /* Invalid size */
2241                     sok &= bx;
2242                 }
2243
2244                 /* While we're here, ensure the user didn't specify
2245                    WORD or QWORD. */
2246                 if (input->disp_size == 16 || input->disp_size == 64)
2247                     return NULL;
2248
2249                 if (addrbits == 16 ||
2250                     (addrbits == 32 && !(sok & BITS32)) ||
2251                     (addrbits == 64 && !(sok & BITS64)))
2252                     return NULL;
2253
2254                 /* now reorganize base/index */
2255                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2256                     ((hb == b && ht == EAH_NOTBASE)
2257                      || (hb == i && ht == EAH_MAKEBASE))) {
2258                     /* swap if hints say so */
2259                     t = bt, bt = it, it = t;
2260                     t = bx, bx = ix, ix = t;
2261                 }
2262                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2263                     bt = -1, bx = 0, s++;
2264                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2265                     /* make single reg base, unless hint */
2266                     bt = it, bx = ix, it = -1, ix = 0;
2267                 }
2268                 if (((s == 2 && it != REG_NUM_ESP
2269                       && !(input->eaflags & EAF_TIMESTWO)) || s == 3
2270                      || s == 5 || s == 9) && bt == -1)
2271                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2272                 if (it == -1 && (bt & 7) != REG_NUM_ESP
2273                     && (input->eaflags & EAF_TIMESTWO))
2274                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2275                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2276                 if (s == 1 && it == REG_NUM_ESP) {
2277                     /* swap ESP into base if scale is 1 */
2278                     t = it, it = bt, bt = t;
2279                     t = ix, ix = bx, bx = t;
2280                 }
2281                 if (it == REG_NUM_ESP
2282                     || (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2283                     return NULL;        /* wrong, for various reasons */
2284
2285                 output->rex |= rexflags(it, ix, REX_X);
2286                 output->rex |= rexflags(bt, bx, REX_B);
2287
2288                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2289                     /* no SIB needed */
2290                     int mod, rm;
2291
2292                     if (bt == -1) {
2293                         rm = 5;
2294                         mod = 0;
2295                     } else {
2296                         rm = (bt & 7);
2297                         if (rm != REG_NUM_EBP && o == 0 &&
2298                                 seg == NO_SEG && !forw_ref &&
2299                                 !(input->eaflags &
2300                                   (EAF_BYTEOFFS | EAF_WORDOFFS)))
2301                             mod = 0;
2302                         else if (input->eaflags & EAF_BYTEOFFS ||
2303                                  (o >= -128 && o <= 127 && seg == NO_SEG
2304                                   && !forw_ref
2305                                   && !(input->eaflags & EAF_WORDOFFS)))
2306                             mod = 1;
2307                         else
2308                             mod = 2;
2309                     }
2310
2311                     output->sib_present = false;
2312                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2313                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2314                 } else {
2315                     /* we need a SIB */
2316                     int mod, scale, index, base;
2317
2318                     if (it == -1)
2319                         index = 4, s = 1;
2320                     else
2321                         index = (it & 7);
2322
2323                     switch (s) {
2324                     case 1:
2325                         scale = 0;
2326                         break;
2327                     case 2:
2328                         scale = 1;
2329                         break;
2330                     case 4:
2331                         scale = 2;
2332                         break;
2333                     case 8:
2334                         scale = 3;
2335                         break;
2336                     default:   /* then what the smeg is it? */
2337                         return NULL;    /* panic */
2338                     }
2339
2340                     if (bt == -1) {
2341                         base = 5;
2342                         mod = 0;
2343                     } else {
2344                         base = (bt & 7);
2345                         if (base != REG_NUM_EBP && o == 0 &&
2346                                     seg == NO_SEG && !forw_ref &&
2347                                     !(input->eaflags &
2348                                       (EAF_BYTEOFFS | EAF_WORDOFFS)))
2349                             mod = 0;
2350                         else if (input->eaflags & EAF_BYTEOFFS ||
2351                                  (o >= -128 && o <= 127 && seg == NO_SEG
2352                                   && !forw_ref
2353                                   && !(input->eaflags & EAF_WORDOFFS)))
2354                             mod = 1;
2355                         else
2356                             mod = 2;
2357                     }
2358
2359                     output->sib_present = true;
2360                     output->bytes =  (bt == -1 || mod == 2 ? 4 : mod);
2361                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | 4;
2362                     output->sib = (scale << 6) | (index << 3) | base;
2363                 }
2364             } else {            /* it's 16-bit */
2365                 int mod, rm;
2366
2367                 /* check for 64-bit long mode */
2368                 if (addrbits == 64)
2369                     return NULL;
2370
2371                 /* check all registers are BX, BP, SI or DI */
2372                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI
2373                      && b != R_DI) || (i != -1 && i != R_BP && i != R_BX
2374                                        && i != R_SI && i != R_DI))
2375                     return NULL;
2376
2377                 /* ensure the user didn't specify DWORD/QWORD */
2378                 if (input->disp_size == 32 || input->disp_size == 64)
2379                     return NULL;
2380
2381                 if (s != 1 && i != -1)
2382                     return NULL;        /* no can do, in 16-bit EA */
2383                 if (b == -1 && i != -1) {
2384                     int tmp = b;
2385                     b = i;
2386                     i = tmp;
2387                 }               /* swap */
2388                 if ((b == R_SI || b == R_DI) && i != -1) {
2389                     int tmp = b;
2390                     b = i;
2391                     i = tmp;
2392                 }
2393                 /* have BX/BP as base, SI/DI index */
2394                 if (b == i)
2395                     return NULL;        /* shouldn't ever happen, in theory */
2396                 if (i != -1 && b != -1 &&
2397                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2398                     return NULL;        /* invalid combinations */
2399                 if (b == -1)    /* pure offset: handled above */
2400                     return NULL;        /* so if it gets to here, panic! */
2401
2402                 rm = -1;
2403                 if (i != -1)
2404                     switch (i * 256 + b) {
2405                     case R_SI * 256 + R_BX:
2406                         rm = 0;
2407                         break;
2408                     case R_DI * 256 + R_BX:
2409                         rm = 1;
2410                         break;
2411                     case R_SI * 256 + R_BP:
2412                         rm = 2;
2413                         break;
2414                     case R_DI * 256 + R_BP:
2415                         rm = 3;
2416                         break;
2417                 } else
2418                     switch (b) {
2419                     case R_SI:
2420                         rm = 4;
2421                         break;
2422                     case R_DI:
2423                         rm = 5;
2424                         break;
2425                     case R_BP:
2426                         rm = 6;
2427                         break;
2428                     case R_BX:
2429                         rm = 7;
2430                         break;
2431                     }
2432                 if (rm == -1)   /* can't happen, in theory */
2433                     return NULL;        /* so panic if it does */
2434
2435                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2436                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2437                     mod = 0;
2438                 else if (input->eaflags & EAF_BYTEOFFS ||
2439                          (o >= -128 && o <= 127 && seg == NO_SEG
2440                           && !forw_ref
2441                           && !(input->eaflags & EAF_WORDOFFS)))
2442                     mod = 1;
2443                 else
2444                     mod = 2;
2445
2446                 output->sib_present = false;    /* no SIB - it's 16-bit */
2447                 output->bytes = mod;    /* bytes of offset needed */
2448                 output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2449             }
2450         }
2451     }
2452
2453     output->size = 1 + output->sib_present + output->bytes;
2454     return output;
2455 }
2456
2457 static void add_asp(insn *ins, int addrbits)
2458 {
2459     int j, valid;
2460     int defdisp;
2461
2462     valid = (addrbits == 64) ? 64|32 : 32|16;
2463
2464     switch (ins->prefixes[PPS_ASIZE]) {
2465     case P_A16:
2466         valid &= 16;
2467         break;
2468     case P_A32:
2469         valid &= 32;
2470         break;
2471     case P_A64:
2472         valid &= 64;
2473         break;
2474     case P_ASP:
2475         valid &= (addrbits == 32) ? 16 : 32;
2476         break;
2477     default:
2478         break;
2479     }
2480
2481     for (j = 0; j < ins->operands; j++) {
2482         if (!(MEMORY & ~ins->oprs[j].type)) {
2483             int32_t i, b;
2484
2485             /* Verify as Register */
2486             if (ins->oprs[j].indexreg < EXPR_REG_START
2487                 || ins->oprs[j].indexreg >= REG_ENUM_LIMIT)
2488                 i = 0;
2489             else
2490                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2491
2492             /* Verify as Register */
2493             if (ins->oprs[j].basereg < EXPR_REG_START
2494                 || ins->oprs[j].basereg >= REG_ENUM_LIMIT)
2495                 b = 0;
2496             else
2497                 b = nasm_reg_flags[ins->oprs[j].basereg];
2498
2499             if (ins->oprs[j].scale == 0)
2500                 i = 0;
2501
2502             if (!i && !b) {
2503                 int ds = ins->oprs[j].disp_size;
2504                 if ((addrbits != 64 && ds > 8) ||
2505                     (addrbits == 64 && ds == 16))
2506                     valid &= ds;
2507             } else {
2508                 if (!(REG16 & ~b))
2509                     valid &= 16;
2510                 if (!(REG32 & ~b))
2511                     valid &= 32;
2512                 if (!(REG64 & ~b))
2513                     valid &= 64;
2514
2515                 if (!(REG16 & ~i))
2516                     valid &= 16;
2517                 if (!(REG32 & ~i))
2518                     valid &= 32;
2519                 if (!(REG64 & ~i))
2520                     valid &= 64;
2521             }
2522         }
2523     }
2524
2525     if (valid & addrbits) {
2526         ins->addr_size = addrbits;
2527     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2528         /* Add an address size prefix */
2529         enum prefixes pref = (addrbits == 32) ? P_A16 : P_A32;
2530         ins->prefixes[PPS_ASIZE] = pref;
2531         ins->addr_size = (addrbits == 32) ? 16 : 32;
2532     } else {
2533         /* Impossible... */
2534         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2535         ins->addr_size = addrbits; /* Error recovery */
2536     }
2537
2538     defdisp = ins->addr_size == 16 ? 16 : 32;
2539
2540     for (j = 0; j < ins->operands; j++) {
2541         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2542             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp)
2543             != ins->addr_size) {
2544             /* mem_offs sizes must match the address size; if not,
2545                strip the MEM_OFFS bit and match only EA instructions */
2546             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2547         }
2548     }
2549 }