assemble.c

   1 /* assemble.c   code generation for the Netwide Assembler
   2  *
   3  * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
   4  * Julian Hall. All rights reserved. The software is
   5  * redistributable under the license given in the file "LICENSE"
   6  * distributed in the NASM archive.
   7  *
   8  * the actual codes (C syntax, i.e. octal):
   9  * \0            - terminates the code. (Unless it's a literal of course.)
  10  * \1..\4        - that many literal bytes follow in the code stream
  11  * \5            - add 4 to the primary operand number (b, low octdigit)
  12  * \6            - add 4 to the secondary operand number (a, middle octdigit)
  13  * \7            - add 4 to both the primary and the secondary operand number
  14  * \10..\13      - a literal byte follows in the code stream, to be added
  15  *                 to the register value of operand 0..3
  16  * \14..\17      - a signed byte immediate operand, from operand 0..3
  17  * \20..\23      - a byte immediate operand, from operand 0..3
  18  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  19  * \30..\33      - a word immediate operand, from operand 0..3
  20  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  21  *                 assembly mode or the operand-size override on the operand
  22  * \40..\43      - a long immediate operand, from operand 0..3
  23  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  24  *                 depending on the address size of the instruction.
  25  * \50..\53      - a byte relative operand, from operand 0..3
  26  * \54..\57      - a qword immediate operand, from operand 0..3
  27  * \60..\63      - a word relative operand, from operand 0..3
  28  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  29  *                 assembly mode or the operand-size override on the operand
  30  * \70..\73      - a long relative operand, from operand 0..3
  31  * \74..\77      - a word constant, from the _segment_ part of operand 0..3
  32  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  33  *                 field the register value of operand b.
  34  * \140..\143    - an immediate word or signed byte for operand 0..3
  35  * \144..\147    - or 2 (s-field) into opcode byte if operand 0..3
  36  *                  is a signed byte rather than a word.  Opcode byte follows.
  37  * \150..\153    - an immediate dword or signed byte for operand 0..3
  38  * \154..\157    - or 2 (s-field) into opcode byte if operand 0..3
  39  *                  is a signed byte rather than a dword.  Opcode byte follows.
  40  * \160..\163    - this instruction uses DREX rather than REX, with the
  41  *                 OC0 field set to 0, and the dest field taken from
  42  *                 operand 0..3.
  43  * \164..\167    - this instruction uses DREX rather than REX, with the
  44  *                 OC0 field set to 1, and the dest field taken from
  45  *                 operand 0..3.
  46  * \171          - placement of DREX suffix in the absence of an EA
  47  * \172\ab       - the register number from operand a in bits 7..4, with
  48  *                 the 4-bit immediate from operand b in bits 3..0.
  49  * \173\xab      - the register number from operand a in bits 7..4, with
  50  *                 the value b in bits 3..0.
  51  * \174\a        - the register number from operand a in bits 7..4, and
  52  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  53  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  54  *                 field equal to digit b.
  55  * \250..\253    - same as \150..\153, except warn if the 64-bit operand
  56  *                 is not equal to the truncated and sign-extended 32-bit
  57  *                 operand; used for 32-bit immediates in 64-bit mode.
  58  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
  59  * \260..\263    - this instruction uses VEX/XOP rather than REX, with the
  60  *                 V field taken from operand 0..3.
  61  * \270          - this instruction uses VEX/XOP rather than REX, with the
  62  *                 V field set to 1111b.
  63  *
  64  * VEX/XOP prefixes are followed by the sequence:
  65  * \tmm\wlp        where mm is the M field; and wlp is:
  66  *                 00 0ww lpp
  67  *                 [w0] ww = 0 for W = 0
  68  *                 [w1] ww = 1 for W = 1
  69  *                 [wx] ww = 2 for W don't care (always assembled as 0)
  70  *                 [ww] ww = 3 for W used as REX.W
  71  *
  72  * t = 0 for VEX (C4/C5), t = 1 for XOP (8F).
  73  *
  74  * \274..\277    - a signed byte immediate operand, from operand 0..3,
  75  *                 which is to be extended to the operand size.
  76  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
  77  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
  78  * \312          - (disassembler only) marker on LOOP, LOOPxx instructions.
  79  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
  80  * \314          - (disassembler only) invalid with REX.B
  81  * \315          - (disassembler only) invalid with REX.X
  82  * \316          - (disassembler only) invalid with REX.R
  83  * \317          - (disassembler only) invalid with REX.W
  84  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
  85  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
  86  * \322          - indicates that this instruction is only valid when the
  87  *                 operand size is the default (instruction to disassembler,
  88  *                 generates no code in the assembler)
  89  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
  90  * \324          - indicates 64-bit operand size requiring REX prefix.
  91  * \330          - a literal byte follows in the code stream, to be added
  92  *                 to the condition code value of the instruction.
  93  * \331          - instruction not valid with REP prefix.  Hint for
  94  *                 disassembler only; for SSE instructions.
  95  * \332          - REP prefix (0xF2 byte) used as opcode extension.
  96  * \333          - REP prefix (0xF3 byte) used as opcode extension.
  97  * \334          - LOCK prefix used instead of REX.R
  98  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
  99  * \336          - force a REP(E) prefix (0xF2) even if not specified.
 100  * \337          - force a REPNE prefix (0xF3) even if not specified.
 101  *                 \336-\337 are still listed as prefixes in the disassembler.
 102  * \340          - reserve <operand 0> bytes of uninitialized storage.
 103  *                 Operand 0 had better be a segmentless constant.
 104  * \341          - this instruction needs a WAIT "prefix"
 105  * \344,\345     - the PUSH/POP (respectively) codes for CS, DS, ES, SS
 106  *                 (POP is never used for CS) depending on operand 0
 107  * \346,\347     - the second byte of PUSH/POP codes for FS, GS, depending
 108  *                 on operand 0
 109  * \360          - no SSE prefix (== \364\331)
 110  * \361          - 66 SSE prefix (== \366\331)
 111  * \362          - F2 SSE prefix (== \364\332)
 112  * \363          - F3 SSE prefix (== \364\333)
 113  * \364          - operand-size prefix (0x66) not permitted
 114  * \365          - address-size prefix (0x67) not permitted
 115  * \366          - operand-size prefix (0x66) used as opcode extension
 116  * \367          - address-size prefix (0x67) used as opcode extension
 117  * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
 118  *                 370 is used for Jcc, 371 is used for JMP.
 119  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 120  *                 used for conditional jump over longer jump
 121  */
 122
 123 #include "compiler.h"
 124
 125 #include <stdio.h>
 126 #include <string.h>
 127 #include <inttypes.h>
 128
 129 #include "nasm.h"
 130 #include "nasmlib.h"
 131 #include "assemble.h"
 132 #include "insns.h"
 133 #include "tables.h"
 134
 135 typedef struct {
 136     int sib_present;                 /* is a SIB byte necessary? */
 137     int bytes;                       /* # of bytes of offset needed */
 138     int size;                        /* lazy - this is sib+bytes+1 */
 139     uint8_t modrm, sib, rex, rip;    /* the bytes themselves */
 140 } ea;
 141
 142 static uint32_t cpu;            /* cpu level received from nasm.c */
 143 static efunc errfunc;
 144 static struct ofmt *outfmt;
 145 static ListGen *list;
 146
 147 static int64_t calcsize(int32_t, int64_t, int, insn *, const uint8_t *);
 148 static void gencode(int32_t segment, int64_t offset, int bits,
 149                     insn * ins, const struct itemplate *temp,
 150                     int64_t insn_end);
 151 static int matches(const struct itemplate *, insn *, int bits);
 152 static int32_t regflag(const operand *);
 153 static int32_t regval(const operand *);
 154 static int rexflags(int, int32_t, int);
 155 static int op_rexflags(const operand *, int);
 156 static ea *process_ea(operand *, ea *, int, int, int, int32_t);
 157 static void add_asp(insn *, int);
 158
 159 static int has_prefix(insn * ins, enum prefix_pos pos, enum prefixes prefix)
 160 {
 161     return ins->prefixes[pos] == prefix;
 162 }
 163
 164 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 165 {
 166     if (ins->prefixes[pos])
 167         errfunc(ERR_NONFATAL, "invalid %s prefix",
 168                 prefix_name(ins->prefixes[pos]));
 169 }
 170
 171 static const char *size_name(int size)
 172 {
 173     switch (size) {
 174     case 1:
 175         return "byte";
 176     case 2:
 177         return "word";
 178     case 4:
 179         return "dword";
 180     case 8:
 181         return "qword";
 182     case 10:
 183         return "tword";
 184     case 16:
 185         return "oword";
 186     case 32:
 187         return "yword";
 188     default:
 189         return "???";
 190     }
 191 }
 192
 193 static void warn_overflow(int size, const struct operand *o)
 194 {
 195     if (size < 8 && o->wrt == NO_SEG && o->segment == NO_SEG) {
 196         int64_t lim = ((int64_t)1 << (size*8))-1;
 197         int64_t data = o->offset;
 198
 199         if (data < ~lim || data > lim)
 200             errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
 201                     "%s data exceeds bounds", size_name(size));
 202     }
 203 }
 204 /*
 205  * This routine wrappers the real output format's output routine,
 206  * in order to pass a copy of the data off to the listing file
 207  * generator at the same time.
 208  */
 209 static void out(int64_t offset, int32_t segto, const void *data,
 210                 enum out_type type, uint64_t size,
 211                 int32_t segment, int32_t wrt)
 212 {
 213     static int32_t lineno = 0;     /* static!!! */
 214     static char *lnfname = NULL;
 215     uint8_t p[8];
 216
 217     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 218         /*
 219          * This is a non-relocated address, and we're going to
 220          * convert it into RAWDATA format.
 221          */
 222         uint8_t *q = p;
 223
 224         if (size > 8) {
 225             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 226             return;
 227         }
 228
 229         WRITEADDR(q, *(int64_t *)data, size);
 230         data = p;
 231         type = OUT_RAWDATA;
 232     }
 233
 234     list->output(offset, data, type, size);
 235
 236     /*
 237      * this call to src_get determines when we call the
 238      * debug-format-specific "linenum" function
 239      * it updates lineno and lnfname to the current values
 240      * returning 0 if "same as last time", -2 if lnfname
 241      * changed, and the amount by which lineno changed,
 242      * if it did. thus, these variables must be static
 243      */
 244
 245     if (src_get(&lineno, &lnfname)) {
 246         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 247     }
 248
 249     outfmt->output(segto, data, type, size, segment, wrt);
 250 }
 251
 252 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 253                      insn * ins, const uint8_t *code)
 254 {
 255     int64_t isize;
 256     uint8_t c = code[0];
 257
 258     if ((c != 0370 && c != 0371) || (ins->oprs[0].type & STRICT))
 259         return false;
 260     if (!optimizing)
 261         return false;
 262     if (optimizing < 0 && c == 0371)
 263         return false;
 264
 265     isize = calcsize(segment, offset, bits, ins, code);
 266
 267     if (ins->oprs[0].opflags & OPFLAG_UNKNOWN)
 268         /* Be optimistic in pass 1 */
 269         return true;
 270
 271     if (ins->oprs[0].segment != segment)
 272         return false;
 273
 274     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 275     return (isize >= -128 && isize <= 127); /* is it byte size? */
 276 }
 277
 278 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 279               insn * instruction, struct ofmt *output, efunc error,
 280               ListGen * listgen)
 281 {
 282     const struct itemplate *temp;
 283     int j;
 284     int size_prob;
 285     int64_t insn_end;
 286     int32_t itimes;
 287     int64_t start = offset;
 288     int64_t wsize = 0;             /* size for DB etc. */
 289
 290     errfunc = error;            /* to pass to other functions */
 291     cpu = cp;
 292     outfmt = output;            /* likewise */
 293     list = listgen;             /* and again */
 294
 295     switch (instruction->opcode) {
 296     case -1:
 297         return 0;
 298     case I_DB:
 299         wsize = 1;
 300         break;
 301     case I_DW:
 302         wsize = 2;
 303         break;
 304     case I_DD:
 305         wsize = 4;
 306         break;
 307     case I_DQ:
 308         wsize = 8;
 309         break;
 310     case I_DT:
 311         wsize = 10;
 312         break;
 313     case I_DO:
 314         wsize = 16;
 315         break;
 316     case I_DY:
 317         wsize = 32;
 318         break;
 319     default:
 320         break;
 321     }
 322
 323     if (wsize) {
 324         extop *e;
 325         int32_t t = instruction->times;
 326         if (t < 0)
 327             errfunc(ERR_PANIC,
 328                     "instruction->times < 0 (%ld) in assemble()", t);
 329
 330         while (t--) {           /* repeat TIMES times */
 331             for (e = instruction->eops; e; e = e->next) {
 332                 if (e->type == EOT_DB_NUMBER) {
 333                     if (wsize == 1) {
 334                         if (e->segment != NO_SEG)
 335                             errfunc(ERR_NONFATAL,
 336                                     "one-byte relocation attempted");
 337                         else {
 338                             uint8_t out_byte = e->offset;
 339                             out(offset, segment, &out_byte,
 340                                 OUT_RAWDATA, 1, NO_SEG, NO_SEG);
 341                         }
 342                     } else if (wsize > 8) {
 343                         errfunc(ERR_NONFATAL,
 344                                 "integer supplied to a DT, DO or DY"
 345                                 " instruction");
 346                     } else
 347                         out(offset, segment, &e->offset,
 348                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 349                     offset += wsize;
 350                 } else if (e->type == EOT_DB_STRING ||
 351                            e->type == EOT_DB_STRING_FREE) {
 352                     int align;
 353
 354                     out(offset, segment, e->stringval,
 355                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 356                     align = e->stringlen % wsize;
 357
 358                     if (align) {
 359                         align = wsize - align;
 360                         out(offset, segment, zero_buffer,
 361                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 362                     }
 363                     offset += e->stringlen + align;
 364                 }
 365             }
 366             if (t > 0 && t == instruction->times - 1) {
 367                 /*
 368                  * Dummy call to list->output to give the offset to the
 369                  * listing module.
 370                  */
 371                 list->output(offset, NULL, OUT_RAWDATA, 0);
 372                 list->uplevel(LIST_TIMES);
 373             }
 374         }
 375         if (instruction->times > 1)
 376             list->downlevel(LIST_TIMES);
 377         return offset - start;
 378     }
 379
 380     if (instruction->opcode == I_INCBIN) {
 381         const char *fname = instruction->eops->stringval;
 382         FILE *fp;
 383
 384         fp = fopen(fname, "rb");
 385         if (!fp) {
 386             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 387                   fname);
 388         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 389             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 390                   fname);
 391         } else {
 392             static char buf[4096];
 393             size_t t = instruction->times;
 394             size_t base = 0;
 395             size_t len;
 396
 397             len = ftell(fp);
 398             if (instruction->eops->next) {
 399                 base = instruction->eops->next->offset;
 400                 len -= base;
 401                 if (instruction->eops->next->next &&
 402                     len > (size_t)instruction->eops->next->next->offset)
 403                     len = (size_t)instruction->eops->next->next->offset;
 404             }
 405             /*
 406              * Dummy call to list->output to give the offset to the
 407              * listing module.
 408              */
 409             list->output(offset, NULL, OUT_RAWDATA, 0);
 410             list->uplevel(LIST_INCBIN);
 411             while (t--) {
 412                 size_t l;
 413
 414                 fseek(fp, base, SEEK_SET);
 415                 l = len;
 416                 while (l > 0) {
 417                     int32_t m =
 418                         fread(buf, 1, (l > (int32_t) sizeof(buf) ? (int32_t) sizeof(buf) : l),
 419                               fp);
 420                     if (!m) {
 421                         /*
 422                          * This shouldn't happen unless the file
 423                          * actually changes while we are reading
 424                          * it.
 425                          */
 426                         error(ERR_NONFATAL,
 427                               "`incbin': unexpected EOF while"
 428                               " reading file `%s'", fname);
 429                         t = 0;  /* Try to exit cleanly */
 430                         break;
 431                     }
 432                     out(offset, segment, buf, OUT_RAWDATA, m,
 433                         NO_SEG, NO_SEG);
 434                     l -= m;
 435                 }
 436             }
 437             list->downlevel(LIST_INCBIN);
 438             if (instruction->times > 1) {
 439                 /*
 440                  * Dummy call to list->output to give the offset to the
 441                  * listing module.
 442                  */
 443                 list->output(offset, NULL, OUT_RAWDATA, 0);
 444                 list->uplevel(LIST_TIMES);
 445                 list->downlevel(LIST_TIMES);
 446             }
 447             fclose(fp);
 448             return instruction->times * len;
 449         }
 450         return 0;               /* if we're here, there's an error */
 451     }
 452
 453     /* Check to see if we need an address-size prefix */
 454     add_asp(instruction, bits);
 455
 456     size_prob = 0;
 457
 458     for (temp = nasm_instructions[instruction->opcode]; temp->opcode != -1; temp++){
 459         int m = matches(temp, instruction, bits);
 460         if (m == 100 ||
 461             (m == 99 && jmp_match(segment, offset, bits,
 462                                   instruction, temp->code))) {
 463             /* Matches! */
 464             int64_t insn_size = calcsize(segment, offset, bits,
 465                                       instruction, temp->code);
 466             itimes = instruction->times;
 467             if (insn_size < 0)  /* shouldn't be, on pass two */
 468                 error(ERR_PANIC, "errors made it through from pass one");
 469             else
 470                 while (itimes--) {
 471                     for (j = 0; j < MAXPREFIX; j++) {
 472                         uint8_t c = 0;
 473                         switch (instruction->prefixes[j]) {
 474                         case P_WAIT:
 475                             c = 0x9B;
 476                             break;
 477                         case P_LOCK:
 478                             c = 0xF0;
 479                             break;
 480                         case P_REPNE:
 481                         case P_REPNZ:
 482                             c = 0xF2;
 483                             break;
 484                         case P_REPE:
 485                         case P_REPZ:
 486                         case P_REP:
 487                             c = 0xF3;
 488                             break;
 489                         case R_CS:
 490                             if (bits == 64) {
 491                                 error(ERR_WARNING | ERR_PASS2,
 492                                       "cs segment base generated, but will be ignored in 64-bit mode");
 493                             }
 494                             c = 0x2E;
 495                             break;
 496                         case R_DS:
 497                             if (bits == 64) {
 498                                 error(ERR_WARNING | ERR_PASS2,
 499                                       "ds segment base generated, but will be ignored in 64-bit mode");
 500                             }
 501                             c = 0x3E;
 502                             break;
 503                         case R_ES:
 504                            if (bits == 64) {
 505                                 error(ERR_WARNING | ERR_PASS2,
 506                                       "es segment base generated, but will be ignored in 64-bit mode");
 507                            }
 508                             c = 0x26;
 509                             break;
 510                         case R_FS:
 511                             c = 0x64;
 512                             break;
 513                         case R_GS:
 514                             c = 0x65;
 515                             break;
 516                         case R_SS:
 517                             if (bits == 64) {
 518                                 error(ERR_WARNING | ERR_PASS2,
 519                                       "ss segment base generated, but will be ignored in 64-bit mode");
 520                             }
 521                             c = 0x36;
 522                             break;
 523                         case R_SEGR6:
 524                         case R_SEGR7:
 525                             error(ERR_NONFATAL,
 526                                   "segr6 and segr7 cannot be used as prefixes");
 527                             break;
 528                         case P_A16:
 529                             if (bits == 64) {
 530                                 error(ERR_NONFATAL,
 531                                       "16-bit addressing is not supported "
 532                                       "in 64-bit mode");
 533                             } else if (bits != 16)
 534                                 c = 0x67;
 535                             break;
 536                         case P_A32:
 537                             if (bits != 32)
 538                                 c = 0x67;
 539                             break;
 540                         case P_A64:
 541                             if (bits != 64) {
 542                                 error(ERR_NONFATAL,
 543                                       "64-bit addressing is only supported "
 544                                       "in 64-bit mode");
 545                             }
 546                             break;
 547                         case P_ASP:
 548                             c = 0x67;
 549                             break;
 550                         case P_O16:
 551                             if (bits != 16)
 552                                 c = 0x66;
 553                             break;
 554                         case P_O32:
 555                             if (bits == 16)
 556                                 c = 0x66;
 557                             break;
 558                         case P_O64:
 559                             /* REX.W */
 560                             break;
 561                         case P_OSP:
 562                             c = 0x66;
 563                             break;
 564                         case P_none:
 565                             break;
 566                         default:
 567                             error(ERR_PANIC, "invalid instruction prefix");
 568                         }
 569                         if (c != 0) {
 570                             out(offset, segment, &c, OUT_RAWDATA, 1,
 571                                 NO_SEG, NO_SEG);
 572                             offset++;
 573                         }
 574                     }
 575                     insn_end = offset + insn_size;
 576                     gencode(segment, offset, bits, instruction,
 577                             temp, insn_end);
 578                     offset += insn_size;
 579                     if (itimes > 0 && itimes == instruction->times - 1) {
 580                         /*
 581                          * Dummy call to list->output to give the offset to the
 582                          * listing module.
 583                          */
 584                         list->output(offset, NULL, OUT_RAWDATA, 0);
 585                         list->uplevel(LIST_TIMES);
 586                     }
 587                 }
 588             if (instruction->times > 1)
 589                 list->downlevel(LIST_TIMES);
 590             return offset - start;
 591         } else if (m > 0 && m > size_prob) {
 592             size_prob = m;
 593         }
 594     }
 595
 596     if (temp->opcode == -1) {   /* didn't match any instruction */
 597         switch (size_prob) {
 598         case 1:
 599             error(ERR_NONFATAL, "operation size not specified");
 600             break;
 601         case 2:
 602             error(ERR_NONFATAL, "mismatch in operand sizes");
 603             break;
 604         case 3:
 605             error(ERR_NONFATAL, "no instruction for this cpu level");
 606             break;
 607         case 4:
 608             error(ERR_NONFATAL, "instruction not supported in %d-bit mode",
 609                   bits);
 610             break;
 611         default:
 612             error(ERR_NONFATAL,
 613                   "invalid combination of opcode and operands");
 614             break;
 615         }
 616     }
 617     return 0;
 618 }
 619
 620 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 621                insn * instruction, efunc error)
 622 {
 623     const struct itemplate *temp;
 624
 625     errfunc = error;            /* to pass to other functions */
 626     cpu = cp;
 627
 628     if (instruction->opcode == -1)
 629         return 0;
 630
 631     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 632         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 633         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 634         instruction->opcode == I_DY) {
 635         extop *e;
 636         int32_t isize, osize, wsize = 0;   /* placate gcc */
 637
 638         isize = 0;
 639         switch (instruction->opcode) {
 640         case I_DB:
 641             wsize = 1;
 642             break;
 643         case I_DW:
 644             wsize = 2;
 645             break;
 646         case I_DD:
 647             wsize = 4;
 648             break;
 649         case I_DQ:
 650             wsize = 8;
 651             break;
 652         case I_DT:
 653             wsize = 10;
 654             break;
 655         case I_DO:
 656             wsize = 16;
 657             break;
 658         case I_DY:
 659             wsize = 32;
 660             break;
 661         default:
 662             break;
 663         }
 664
 665         for (e = instruction->eops; e; e = e->next) {
 666             int32_t align;
 667
 668             osize = 0;
 669             if (e->type == EOT_DB_NUMBER)
 670                 osize = 1;
 671             else if (e->type == EOT_DB_STRING ||
 672                      e->type == EOT_DB_STRING_FREE)
 673                 osize = e->stringlen;
 674
 675             align = (-osize) % wsize;
 676             if (align < 0)
 677                 align += wsize;
 678             isize += osize + align;
 679         }
 680         return isize * instruction->times;
 681     }
 682
 683     if (instruction->opcode == I_INCBIN) {
 684         const char *fname = instruction->eops->stringval;
 685         FILE *fp;
 686         size_t len;
 687
 688         fp = fopen(fname, "rb");
 689         if (!fp)
 690             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 691                   fname);
 692         else if (fseek(fp, 0L, SEEK_END) < 0)
 693             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 694                   fname);
 695         else {
 696             len = ftell(fp);
 697             fclose(fp);
 698             if (instruction->eops->next) {
 699                 len -= instruction->eops->next->offset;
 700                 if (instruction->eops->next->next &&
 701                     len > (size_t)instruction->eops->next->next->offset) {
 702                     len = (size_t)instruction->eops->next->next->offset;
 703                 }
 704             }
 705             return instruction->times * len;
 706         }
 707         return 0;               /* if we're here, there's an error */
 708     }
 709
 710     /* Check to see if we need an address-size prefix */
 711     add_asp(instruction, bits);
 712
 713     for (temp = nasm_instructions[instruction->opcode]; temp->opcode != -1; temp++) {
 714         int m = matches(temp, instruction, bits);
 715         if (m == 100 ||
 716             (m == 99 && jmp_match(segment, offset, bits,
 717                                   instruction, temp->code))) {
 718             /* we've matched an instruction. */
 719             int64_t isize;
 720             const uint8_t *codes = temp->code;
 721             int j;
 722
 723             isize = calcsize(segment, offset, bits, instruction, codes);
 724             if (isize < 0)
 725                 return -1;
 726             for (j = 0; j < MAXPREFIX; j++) {
 727                 switch (instruction->prefixes[j]) {
 728                 case P_A16:
 729                     if (bits != 16)
 730                         isize++;
 731                     break;
 732                 case P_A32:
 733                     if (bits != 32)
 734                         isize++;
 735                     break;
 736                 case P_O16:
 737                     if (bits != 16)
 738                         isize++;
 739                     break;
 740                 case P_O32:
 741                     if (bits == 16)
 742                         isize++;
 743                     break;
 744                 case P_A64:
 745                 case P_O64:
 746                 case P_none:
 747                     break;
 748                 default:
 749                     isize++;
 750                     break;
 751                 }
 752             }
 753             return isize * instruction->times;
 754         }
 755     }
 756     return -1;                  /* didn't match any instruction */
 757 }
 758
 759 static bool possible_sbyte(operand *o)
 760 {
 761     return o->wrt == NO_SEG && o->segment == NO_SEG &&
 762         !(o->opflags & OPFLAG_UNKNOWN) &&
 763         optimizing >= 0 && !(o->type & STRICT);
 764 }
 765
 766 /* check that opn[op]  is a signed byte of size 16 or 32 */
 767 static bool is_sbyte16(operand *o)
 768 {
 769     int16_t v;
 770
 771     if (!possible_sbyte(o))
 772         return false;
 773
 774     v = o->offset;
 775     return v >= -128 && v <= 127;
 776 }
 777
 778 static bool is_sbyte32(operand *o)
 779 {
 780     int32_t v;
 781
 782     if (!possible_sbyte(o))
 783         return false;
 784
 785     v = o->offset;
 786     return v >= -128 && v <= 127;
 787 }
 788
 789 /* Common construct */
 790 #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
 791
 792 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 793                         insn * ins, const uint8_t *codes)
 794 {
 795     int64_t length = 0;
 796     uint8_t c;
 797     int rex_mask = ~0;
 798     int op1, op2;
 799     struct operand *opx;
 800     uint8_t opex = 0;
 801
 802     ins->rex = 0;               /* Ensure REX is reset */
 803
 804     if (ins->prefixes[PPS_OSIZE] == P_O64)
 805         ins->rex |= REX_W;
 806
 807     (void)segment;              /* Don't warn that this parameter is unused */
 808     (void)offset;               /* Don't warn that this parameter is unused */
 809
 810     while (*codes) {
 811         c = *codes++;
 812         op1 = (c & 3) + ((opex & 1) << 2);
 813         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
 814         opx = &ins->oprs[op1];
 815         opex = 0;               /* For the next iteration */
 816
 817         switch (c) {
 818         case 01:
 819         case 02:
 820         case 03:
 821         case 04:
 822             codes += c, length += c;
 823             break;
 824
 825         case 05:
 826         case 06:
 827         case 07:
 828             opex = c;
 829             break;
 830
 831         case4(010):
 832             ins->rex |=
 833                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 834             codes++, length++;
 835             break;
 836
 837         case4(014):
 838         case4(020):
 839         case4(024):
 840             length++;
 841             break;
 842
 843         case4(030):
 844             length += 2;
 845             break;
 846
 847         case4(034):
 848             if (opx->type & (BITS16 | BITS32 | BITS64))
 849                 length += (opx->type & BITS16) ? 2 : 4;
 850             else
 851                 length += (bits == 16) ? 2 : 4;
 852             break;
 853
 854         case4(040):
 855             length += 4;
 856             break;
 857
 858         case4(044):
 859             length += ins->addr_size >> 3;
 860             break;
 861
 862         case4(050):
 863             length++;
 864             break;
 865
 866         case4(054):
 867             length += 8; /* MOV reg64/imm */
 868             break;
 869
 870         case4(060):
 871             length += 2;
 872             break;
 873
 874         case4(064):
 875             if (opx->type & (BITS16 | BITS32 | BITS64))
 876                 length += (opx->type & BITS16) ? 2 : 4;
 877             else
 878                 length += (bits == 16) ? 2 : 4;
 879             break;
 880
 881         case4(070):
 882             length += 4;
 883             break;
 884
 885         case4(074):
 886             length += 2;
 887             break;
 888
 889         case4(0140):
 890             length += is_sbyte16(opx) ? 1 : 2;
 891             break;
 892
 893         case4(0144):
 894             codes++;
 895             length++;
 896             break;
 897
 898         case4(0150):
 899             length += is_sbyte32(opx) ? 1 : 4;
 900             break;
 901
 902         case4(0154):
 903             codes++;
 904             length++;
 905             break;
 906
 907         case4(0160):
 908             length++;
 909             ins->rex |= REX_D;
 910             ins->drexdst = regval(opx);
 911             break;
 912
 913         case4(0164):
 914             length++;
 915             ins->rex |= REX_D|REX_OC;
 916             ins->drexdst = regval(opx);
 917             break;
 918
 919         case 0171:
 920             break;
 921
 922         case 0172:
 923         case 0173:
 924         case 0174:
 925             codes++;
 926             length++;
 927             break;
 928
 929         case4(0250):
 930             length += is_sbyte32(opx) ? 1 : 4;
 931             break;
 932
 933         case4(0254):
 934             length += 4;
 935             break;
 936
 937         case4(0260):
 938             ins->rex |= REX_V;
 939             ins->drexdst = regval(opx);
 940             ins->vex_cm = *codes++;
 941             ins->vex_wlp = *codes++;
 942             break;
 943
 944         case 0270:
 945             ins->rex |= REX_V;
 946             ins->drexdst = 0;
 947             ins->vex_cm = *codes++;
 948             ins->vex_wlp = *codes++;
 949             break;
 950
 951         case4(0274):
 952             length++;
 953             break;
 954
 955         case4(0300):
 956             break;
 957
 958         case 0310:
 959             if (bits == 64)
 960                 return -1;
 961             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
 962             break;
 963
 964         case 0311:
 965             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
 966             break;
 967
 968         case 0312:
 969             break;
 970
 971         case 0313:
 972             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
 973                 has_prefix(ins, PPS_ASIZE, P_A32))
 974                 return -1;
 975             break;
 976
 977         case4(0314):
 978             break;
 979
 980         case 0320:
 981             length += (bits != 16);
 982             break;
 983
 984         case 0321:
 985             length += (bits == 16);
 986             break;
 987
 988         case 0322:
 989             break;
 990
 991         case 0323:
 992             rex_mask &= ~REX_W;
 993             break;
 994
 995         case 0324:
 996             ins->rex |= REX_W;
 997             break;
 998
 999         case 0330:
1000             codes++, length++;
1001             break;
1002
1003         case 0331:
1004             break;
1005
1006         case 0332:
1007         case 0333:
1008             length++;
1009             break;
1010
1011         case 0334:
1012             ins->rex |= REX_L;
1013             break;
1014
1015         case 0335:
1016             break;
1017
1018         case 0336:
1019             if (!ins->prefixes[PPS_LREP])
1020                 ins->prefixes[PPS_LREP] = P_REP;
1021             break;
1022
1023         case 0337:
1024             if (!ins->prefixes[PPS_LREP])
1025                 ins->prefixes[PPS_LREP] = P_REPNE;
1026             break;
1027
1028         case 0340:
1029             if (ins->oprs[0].segment != NO_SEG)
1030                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1031                         " quantity of BSS space");
1032             else
1033                 length += ins->oprs[0].offset;
1034             break;
1035
1036         case 0341:
1037             if (!ins->prefixes[PPS_WAIT])
1038                 ins->prefixes[PPS_WAIT] = P_WAIT;
1039             break;
1040
1041         case4(0344):
1042             length++;
1043             break;
1044
1045         case 0360:
1046             break;
1047
1048         case 0361:
1049         case 0362:
1050         case 0363:
1051             length++;
1052             break;
1053
1054         case 0364:
1055         case 0365:
1056             break;
1057
1058         case 0366:
1059         case 0367:
1060             length++;
1061             break;
1062
1063         case 0370:
1064         case 0371:
1065         case 0372:
1066             break;
1067
1068         case 0373:
1069             length++;
1070             break;
1071
1072         case4(0100):
1073         case4(0110):
1074         case4(0120):
1075         case4(0130):
1076         case4(0200):
1077         case4(0204):
1078         case4(0210):
1079         case4(0214):
1080         case4(0220):
1081         case4(0224):
1082         case4(0230):
1083         case4(0234):
1084             {
1085                 ea ea_data;
1086                 int rfield;
1087                 int32_t rflags;
1088                 struct operand *opy = &ins->oprs[op2];
1089
1090                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1091
1092                 if (c <= 0177) {
1093                     /* pick rfield from operand b (opx) */
1094                     rflags = regflag(opx);
1095                     rfield = nasm_regvals[opx->basereg];
1096                 } else {
1097                     rflags = 0;
1098                     rfield = c & 7;
1099                 }
1100                 if (!process_ea(opy, &ea_data, bits,
1101                                 ins->addr_size, rfield, rflags)) {
1102                     errfunc(ERR_NONFATAL, "invalid effective address");
1103                     return -1;
1104                 } else {
1105                     ins->rex |= ea_data.rex;
1106                     length += ea_data.size;
1107                 }
1108             }
1109             break;
1110
1111         default:
1112             errfunc(ERR_PANIC, "internal instruction table corrupt"
1113                     ": instruction code \\%o (0x%02X) given", c, c);
1114             break;
1115         }
1116     }
1117
1118     ins->rex &= rex_mask;
1119
1120     if (ins->rex & REX_V) {
1121         int bad32 = REX_R|REX_W|REX_X|REX_B;
1122
1123         if (ins->rex & REX_H) {
1124             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1125             return -1;
1126         }
1127         switch (ins->vex_wlp & 030) {
1128         case 000:
1129         case 020:
1130             ins->rex &= ~REX_W;
1131             break;
1132         case 010:
1133             ins->rex |= REX_W;
1134             bad32 &= ~REX_W;
1135             break;
1136         case 030:
1137             /* Follow REX_W */
1138             break;
1139         }
1140
1141         if (bits != 64 && ((ins->rex & bad32) || ins->drexdst > 7)) {
1142             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1143             return -1;
1144         }
1145         if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_R|REX_B)))
1146             length += 3;
1147         else
1148             length += 2;
1149     } else if (ins->rex & REX_D) {
1150         if (ins->rex & REX_H) {
1151             errfunc(ERR_NONFATAL, "cannot use high register in drex instruction");
1152             return -1;
1153         }
1154         if (bits != 64 && ((ins->rex & (REX_R|REX_W|REX_X|REX_B)) ||
1155                            ins->drexdst > 7)) {
1156             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1157             return -1;
1158         }
1159         length++;
1160     } else if (ins->rex & REX_REAL) {
1161         if (ins->rex & REX_H) {
1162             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1163             return -1;
1164         } else if (bits == 64) {
1165             length++;
1166         } else if ((ins->rex & REX_L) &&
1167                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1168                    cpu >= IF_X86_64) {
1169             /* LOCK-as-REX.R */
1170             assert_no_prefix(ins, PPS_LREP);
1171             length++;
1172         } else {
1173             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1174             return -1;
1175         }
1176     }
1177
1178     return length;
1179 }
1180
1181 #define EMIT_REX()                                                      \
1182     if (!(ins->rex & (REX_D|REX_V)) && (ins->rex & REX_REAL) && (bits == 64)) { \
1183         ins->rex = (ins->rex & REX_REAL)|REX_P;                         \
1184         out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG); \
1185         ins->rex = 0;                                                   \
1186         offset += 1; \
1187     }
1188
1189 static void gencode(int32_t segment, int64_t offset, int bits,
1190                     insn * ins, const struct itemplate *temp,
1191                     int64_t insn_end)
1192 {
1193     static char condval[] = {   /* conditional opcodes */
1194         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1195         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1196         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1197     };
1198     uint8_t c;
1199     uint8_t bytes[4];
1200     int64_t size;
1201     int64_t data;
1202     int op1, op2;
1203     struct operand *opx;
1204     const uint8_t *codes = temp->code;
1205     uint8_t opex = 0;
1206
1207     while (*codes) {
1208         c = *codes++;
1209         op1 = (c & 3) + ((opex & 1) << 2);
1210         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
1211         opx = &ins->oprs[op1];
1212         opex = 0;               /* For the next iteration */
1213
1214         switch (c) {
1215         case 01:
1216         case 02:
1217         case 03:
1218         case 04:
1219             EMIT_REX();
1220             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1221             codes += c;
1222             offset += c;
1223             break;
1224
1225         case 05:
1226         case 06:
1227         case 07:
1228             opex = c;
1229             break;
1230
1231         case4(010):
1232             EMIT_REX();
1233             bytes[0] = *codes++ + (regval(opx) & 7);
1234             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1235             offset += 1;
1236             break;
1237
1238         case4(014):
1239             /* The test for BITS8 and SBYTE here is intended to avoid
1240                warning on optimizer actions due to SBYTE, while still
1241                warn on explicit BYTE directives.  Also warn, obviously,
1242                if the optimizer isn't enabled. */
1243             if (((opx->type & BITS8) ||
1244                  !(opx->type & temp->opd[op1] & BYTENESS)) &&
1245                 (opx->offset < -128 || opx->offset > 127)) {
1246                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1247                         "signed byte value exceeds bounds");
1248             }
1249             if (opx->segment != NO_SEG) {
1250                 data = opx->offset;
1251                 out(offset, segment, &data, OUT_ADDRESS, 1,
1252                     opx->segment, opx->wrt);
1253             } else {
1254                 bytes[0] = opx->offset;
1255                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1256                     NO_SEG);
1257             }
1258             offset += 1;
1259             break;
1260
1261         case4(020):
1262             if (opx->offset < -256 || opx->offset > 255) {
1263                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1264                         "byte value exceeds bounds");
1265             }
1266             if (opx->segment != NO_SEG) {
1267                 data = opx->offset;
1268                 out(offset, segment, &data, OUT_ADDRESS, 1,
1269                     opx->segment, opx->wrt);
1270             } else {
1271                 bytes[0] = opx->offset;
1272                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1273                     NO_SEG);
1274             }
1275             offset += 1;
1276             break;
1277
1278         case4(024):
1279             if (opx->offset < 0 || opx->offset > 255)
1280                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1281                         "unsigned byte value exceeds bounds");
1282             if (opx->segment != NO_SEG) {
1283                 data = opx->offset;
1284                 out(offset, segment, &data, OUT_ADDRESS, 1,
1285                     opx->segment, opx->wrt);
1286             } else {
1287                 bytes[0] = opx->offset;
1288                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1289                     NO_SEG);
1290             }
1291             offset += 1;
1292             break;
1293
1294         case4(030):
1295             warn_overflow(2, opx);
1296             data = opx->offset;
1297             out(offset, segment, &data, OUT_ADDRESS, 2,
1298                 opx->segment, opx->wrt);
1299             offset += 2;
1300             break;
1301
1302         case4(034):
1303             if (opx->type & (BITS16 | BITS32))
1304                 size = (opx->type & BITS16) ? 2 : 4;
1305             else
1306                 size = (bits == 16) ? 2 : 4;
1307             warn_overflow(size, opx);
1308             data = opx->offset;
1309             out(offset, segment, &data, OUT_ADDRESS, size,
1310                 opx->segment, opx->wrt);
1311             offset += size;
1312             break;
1313
1314         case4(040):
1315             warn_overflow(4, opx);
1316             data = opx->offset;
1317             out(offset, segment, &data, OUT_ADDRESS, 4,
1318                 opx->segment, opx->wrt);
1319             offset += 4;
1320             break;
1321
1322         case4(044):
1323             data = opx->offset;
1324             size = ins->addr_size >> 3;
1325             warn_overflow(size, opx);
1326             out(offset, segment, &data, OUT_ADDRESS, size,
1327                 opx->segment, opx->wrt);
1328             offset += size;
1329             break;
1330
1331         case4(050):
1332             if (opx->segment != segment)
1333                 errfunc(ERR_NONFATAL,
1334                         "short relative jump outside segment");
1335             data = opx->offset - insn_end;
1336             if (data > 127 || data < -128)
1337                 errfunc(ERR_NONFATAL, "short jump is out of range");
1338             bytes[0] = data;
1339             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1340             offset += 1;
1341             break;
1342
1343         case4(054):
1344             data = (int64_t)opx->offset;
1345             out(offset, segment, &data, OUT_ADDRESS, 8,
1346                 opx->segment, opx->wrt);
1347             offset += 8;
1348             break;
1349
1350         case4(060):
1351             if (opx->segment != segment) {
1352                 data = opx->offset;
1353                 out(offset, segment, &data,
1354                     OUT_REL2ADR, insn_end - offset,
1355                     opx->segment, opx->wrt);
1356             } else {
1357                 data = opx->offset - insn_end;
1358                 out(offset, segment, &data,
1359                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1360             }
1361             offset += 2;
1362             break;
1363
1364         case4(064):
1365             if (opx->type & (BITS16 | BITS32 | BITS64))
1366                 size = (opx->type & BITS16) ? 2 : 4;
1367             else
1368                 size = (bits == 16) ? 2 : 4;
1369             if (opx->segment != segment) {
1370                 data = opx->offset;
1371                 out(offset, segment, &data,
1372                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1373                     insn_end - offset, opx->segment, opx->wrt);
1374             } else {
1375                 data = opx->offset - insn_end;
1376                 out(offset, segment, &data,
1377                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1378             }
1379             offset += size;
1380             break;
1381
1382         case4(070):
1383             if (opx->segment != segment) {
1384                 data = opx->offset;
1385                 out(offset, segment, &data,
1386                     OUT_REL4ADR, insn_end - offset,
1387                     opx->segment, opx->wrt);
1388             } else {
1389                 data = opx->offset - insn_end;
1390                 out(offset, segment, &data,
1391                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1392             }
1393             offset += 4;
1394             break;
1395
1396         case4(074):
1397             if (opx->segment == NO_SEG)
1398                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1399                         " relocatable");
1400             data = 0;
1401             out(offset, segment, &data, OUT_ADDRESS, 2,
1402                 outfmt->segbase(1 + opx->segment),
1403                 opx->wrt);
1404             offset += 2;
1405             break;
1406
1407         case4(0140):
1408             data = opx->offset;
1409             warn_overflow(2, opx);
1410             if (is_sbyte16(opx)) {
1411                 bytes[0] = data;
1412                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1413                     NO_SEG);
1414                 offset++;
1415             } else {
1416                 out(offset, segment, &data, OUT_ADDRESS, 2,
1417                     opx->segment, opx->wrt);
1418                 offset += 2;
1419             }
1420             break;
1421
1422         case4(0144):
1423             EMIT_REX();
1424             bytes[0] = *codes++;
1425             if (is_sbyte16(opx))
1426                 bytes[0] |= 2;  /* s-bit */
1427             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1428             offset++;
1429             break;
1430
1431         case4(0150):
1432             data = opx->offset;
1433             warn_overflow(4, opx);
1434             if (is_sbyte32(opx)) {
1435                 bytes[0] = data;
1436                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1437                     NO_SEG);
1438                 offset++;
1439             } else {
1440                 out(offset, segment, &data, OUT_ADDRESS, 4,
1441                     opx->segment, opx->wrt);
1442                 offset += 4;
1443             }
1444             break;
1445
1446         case4(0154):
1447             EMIT_REX();
1448             bytes[0] = *codes++;
1449             if (is_sbyte32(opx))
1450                 bytes[0] |= 2;  /* s-bit */
1451             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1452             offset++;
1453             break;
1454
1455         case4(0160):
1456         case4(0164):
1457             break;
1458
1459         case 0171:
1460             bytes[0] =
1461                 (ins->drexdst << 4) |
1462                 (ins->rex & REX_OC ? 0x08 : 0) |
1463                 (ins->rex & (REX_R|REX_X|REX_B));
1464             ins->rex = 0;
1465             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1466             offset++;
1467             break;
1468
1469         case 0172:
1470             c = *codes++;
1471             opx = &ins->oprs[c >> 3];
1472             bytes[0] = nasm_regvals[opx->basereg] << 4;
1473             opx = &ins->oprs[c & 7];
1474             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1475                 errfunc(ERR_NONFATAL,
1476                         "non-absolute expression not permitted as argument %d",
1477                         c & 7);
1478             } else {
1479                 if (opx->offset & ~15) {
1480                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1481                             "four-bit argument exceeds bounds");
1482                 }
1483                 bytes[0] |= opx->offset & 15;
1484             }
1485             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1486             offset++;
1487             break;
1488
1489         case 0173:
1490             c = *codes++;
1491             opx = &ins->oprs[c >> 4];
1492             bytes[0] = nasm_regvals[opx->basereg] << 4;
1493             bytes[0] |= c & 15;
1494             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1495             offset++;
1496             break;
1497
1498         case 0174:
1499             c = *codes++;
1500             opx = &ins->oprs[c];
1501             bytes[0] = nasm_regvals[opx->basereg] << 4;
1502             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1503             offset++;
1504             break;
1505
1506         case4(0250):
1507             data = opx->offset;
1508             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1509                 (int32_t)data != (int64_t)data) {
1510                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1511                         "signed dword immediate exceeds bounds");
1512             }
1513             if (is_sbyte32(opx)) {
1514                 bytes[0] = data;
1515                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1516                     NO_SEG);
1517                 offset++;
1518             } else {
1519                 out(offset, segment, &data, OUT_ADDRESS, 4,
1520                     opx->segment, opx->wrt);
1521                 offset += 4;
1522             }
1523             break;
1524
1525         case4(0254):
1526             data = opx->offset;
1527             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1528                 (int32_t)data != (int64_t)data) {
1529                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1530                         "signed dword immediate exceeds bounds");
1531             }
1532             out(offset, segment, &data, OUT_ADDRESS, 4,
1533                 opx->segment, opx->wrt);
1534             offset += 4;
1535             break;
1536
1537         case4(0260):
1538         case 0270:
1539             codes += 2;
1540             if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1541                 bytes[0] = (ins->vex_cm >> 6) ? 0x8f : 0xc4;
1542                 bytes[1] = (ins->vex_cm & 31) | ((~ins->rex & 7) << 5);
1543                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1544                     ((~ins->drexdst & 15)<< 3) | (ins->vex_wlp & 07);
1545                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1546                 offset += 3;
1547             } else {
1548                 bytes[0] = 0xc5;
1549                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1550                     ((~ins->drexdst & 15) << 3) | (ins->vex_wlp & 07);
1551                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1552                 offset += 2;
1553             }
1554             break;
1555
1556         case4(0274):
1557         {
1558             uint64_t uv, um;
1559             int s;
1560
1561             if (ins->rex & REX_W)
1562                 s = 64;
1563             else if (ins->prefixes[PPS_OSIZE] == P_O16)
1564                 s = 16;
1565             else if (ins->prefixes[PPS_OSIZE] == P_O32)
1566                 s = 32;
1567             else
1568                 s = bits;
1569
1570             um = (uint64_t)2 << (s-1);
1571             uv = opx->offset;
1572
1573             if (uv > 127 && uv < (uint64_t)-128 &&
1574                 (uv < um-128 || uv > um-1)) {
1575                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1576                         "signed byte value exceeds bounds");
1577             }
1578             if (opx->segment != NO_SEG) {
1579                 data = uv;
1580                 out(offset, segment, &data, OUT_ADDRESS, 1,
1581                     opx->segment, opx->wrt);
1582             } else {
1583                 bytes[0] = uv;
1584                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1585                     NO_SEG);
1586             }
1587             offset += 1;
1588             break;
1589         }
1590
1591         case4(0300):
1592             break;
1593
1594         case 0310:
1595             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1596                 *bytes = 0x67;
1597                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1598                 offset += 1;
1599             } else
1600                 offset += 0;
1601             break;
1602
1603         case 0311:
1604             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1605                 *bytes = 0x67;
1606                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1607                 offset += 1;
1608             } else
1609                 offset += 0;
1610             break;
1611
1612         case 0312:
1613             break;
1614
1615         case 0313:
1616             ins->rex = 0;
1617             break;
1618
1619         case4(0314):
1620             break;
1621
1622         case 0320:
1623             if (bits != 16) {
1624                 *bytes = 0x66;
1625                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1626                 offset += 1;
1627             } else
1628                 offset += 0;
1629             break;
1630
1631         case 0321:
1632             if (bits == 16) {
1633                 *bytes = 0x66;
1634                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1635                 offset += 1;
1636             } else
1637                 offset += 0;
1638             break;
1639
1640         case 0322:
1641         case 0323:
1642             break;
1643
1644         case 0324:
1645             ins->rex |= REX_W;
1646             break;
1647
1648         case 0330:
1649             *bytes = *codes++ ^ condval[ins->condition];
1650             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1651             offset += 1;
1652             break;
1653
1654         case 0331:
1655             break;
1656
1657         case 0332:
1658         case 0333:
1659             *bytes = c - 0332 + 0xF2;
1660             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1661             offset += 1;
1662             break;
1663
1664         case 0334:
1665             if (ins->rex & REX_R) {
1666                 *bytes = 0xF0;
1667                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1668                 offset += 1;
1669             }
1670             ins->rex &= ~(REX_L|REX_R);
1671             break;
1672
1673         case 0335:
1674             break;
1675
1676         case 0336:
1677         case 0337:
1678             break;
1679
1680         case 0340:
1681             if (ins->oprs[0].segment != NO_SEG)
1682                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1683             else {
1684                 int64_t size = ins->oprs[0].offset;
1685                 if (size > 0)
1686                     out(offset, segment, NULL,
1687                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1688                 offset += size;
1689             }
1690             break;
1691
1692         case 0341:
1693             break;
1694
1695         case 0344:
1696         case 0345:
1697             bytes[0] = c & 1;
1698             switch (ins->oprs[0].basereg) {
1699             case R_CS:
1700                 bytes[0] += 0x0E;
1701                 break;
1702             case R_DS:
1703                 bytes[0] += 0x1E;
1704                 break;
1705             case R_ES:
1706                 bytes[0] += 0x06;
1707                 break;
1708             case R_SS:
1709                 bytes[0] += 0x16;
1710                 break;
1711             default:
1712                 errfunc(ERR_PANIC,
1713                         "bizarre 8086 segment register received");
1714             }
1715             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1716             offset++;
1717             break;
1718
1719         case 0346:
1720         case 0347:
1721             bytes[0] = c & 1;
1722             switch (ins->oprs[0].basereg) {
1723             case R_FS:
1724                 bytes[0] += 0xA0;
1725                 break;
1726             case R_GS:
1727                 bytes[0] += 0xA8;
1728                 break;
1729             default:
1730                 errfunc(ERR_PANIC,
1731                         "bizarre 386 segment register received");
1732             }
1733             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1734             offset++;
1735             break;
1736
1737         case 0360:
1738             break;
1739
1740         case 0361:
1741             bytes[0] = 0x66;
1742             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1743             offset += 1;
1744             break;
1745
1746         case 0362:
1747         case 0363:
1748             bytes[0] = c - 0362 + 0xf2;
1749             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1750             offset += 1;
1751             break;
1752
1753         case 0364:
1754         case 0365:
1755             break;
1756
1757         case 0366:
1758         case 0367:
1759             *bytes = c - 0366 + 0x66;
1760             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1761             offset += 1;
1762             break;
1763
1764         case 0370:
1765         case 0371:
1766         case 0372:
1767             break;
1768
1769         case 0373:
1770             *bytes = bits == 16 ? 3 : 5;
1771             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1772             offset += 1;
1773             break;
1774
1775         case4(0100):
1776         case4(0110):
1777         case4(0120):
1778         case4(0130):
1779         case4(0200):
1780         case4(0204):
1781         case4(0210):
1782         case4(0214):
1783         case4(0220):
1784         case4(0224):
1785         case4(0230):
1786         case4(0234):
1787             {
1788                 ea ea_data;
1789                 int rfield;
1790                 int32_t rflags;
1791                 uint8_t *p;
1792                 int32_t s;
1793                 enum out_type type;
1794                 struct operand *opy = &ins->oprs[op2];
1795
1796                 if (c <= 0177) {
1797                     /* pick rfield from operand b (opx) */
1798                     rflags = regflag(opx);
1799                     rfield = nasm_regvals[opx->basereg];
1800                 } else {
1801                     /* rfield is constant */
1802                     rflags = 0;
1803                     rfield = c & 7;
1804                 }
1805
1806                 if (!process_ea(opy, &ea_data, bits, ins->addr_size,
1807                                 rfield, rflags)) {
1808                     errfunc(ERR_NONFATAL, "invalid effective address");
1809                 }
1810
1811
1812                 p = bytes;
1813                 *p++ = ea_data.modrm;
1814                 if (ea_data.sib_present)
1815                     *p++ = ea_data.sib;
1816
1817                 /* DREX suffixes come between the SIB and the displacement */
1818                 if (ins->rex & REX_D) {
1819                     *p++ = (ins->drexdst << 4) |
1820                            (ins->rex & REX_OC ? 0x08 : 0) |
1821                            (ins->rex & (REX_R|REX_X|REX_B));
1822                     ins->rex = 0;
1823                 }
1824
1825                 s = p - bytes;
1826                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1827
1828                 /*
1829                  * Make sure the address gets the right offset in case
1830                  * the line breaks in the .lst file (BR 1197827)
1831                  */
1832                 offset += s;
1833                 s = 0;
1834
1835                 switch (ea_data.bytes) {
1836                 case 0:
1837                     break;
1838                 case 1:
1839                 case 2:
1840                 case 4:
1841                 case 8:
1842                     data = opy->offset;
1843                     warn_overflow(ea_data.bytes, opy);
1844                     s += ea_data.bytes;
1845                     if (ea_data.rip) {
1846                         if (opy->segment == segment) {
1847                             data -= insn_end;
1848                             out(offset, segment, &data, OUT_ADDRESS,
1849                                 ea_data.bytes, NO_SEG, NO_SEG);
1850                         } else {
1851                             out(offset, segment, &data, OUT_REL4ADR,
1852                                 insn_end - offset, opy->segment, opy->wrt);
1853                         }
1854                     } else {
1855                         type = OUT_ADDRESS;
1856                         out(offset, segment, &data, OUT_ADDRESS,
1857                             ea_data.bytes, opy->segment, opy->wrt);
1858                     }
1859                     break;
1860                 default:
1861                     /* Impossible! */
1862                     errfunc(ERR_PANIC,
1863                             "Invalid amount of bytes (%d) for offset?!",
1864                             ea_data.bytes);
1865                     break;
1866                 }
1867                 offset += s;
1868             }
1869             break;
1870
1871         default:
1872             errfunc(ERR_PANIC, "internal instruction table corrupt"
1873                     ": instruction code \\%o (0x%02X) given", c, c);
1874             break;
1875         }
1876     }
1877 }
1878
1879 static int32_t regflag(const operand * o)
1880 {
1881     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1882         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1883     }
1884     return nasm_reg_flags[o->basereg];
1885 }
1886
1887 static int32_t regval(const operand * o)
1888 {
1889     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1890         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1891     }
1892     return nasm_regvals[o->basereg];
1893 }
1894
1895 static int op_rexflags(const operand * o, int mask)
1896 {
1897     int32_t flags;
1898     int val;
1899
1900     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1901         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1902     }
1903
1904     flags = nasm_reg_flags[o->basereg];
1905     val = nasm_regvals[o->basereg];
1906
1907     return rexflags(val, flags, mask);
1908 }
1909
1910 static int rexflags(int val, int32_t flags, int mask)
1911 {
1912     int rex = 0;
1913
1914     if (val >= 8)
1915         rex |= REX_B|REX_X|REX_R;
1916     if (flags & BITS64)
1917         rex |= REX_W;
1918     if (!(REG_HIGH & ~flags))   /* AH, CH, DH, BH */
1919         rex |= REX_H;
1920     else if (!(REG8 & ~flags) && val >= 4) /* SPL, BPL, SIL, DIL */
1921         rex |= REX_P;
1922
1923     return rex & mask;
1924 }
1925
1926 static int matches(const struct itemplate *itemp, insn * instruction, int bits)
1927 {
1928     int i, size[MAX_OPERANDS], asize, oprs, ret;
1929
1930     ret = 100;
1931
1932     /*
1933      * Check the opcode
1934      */
1935     if (itemp->opcode != instruction->opcode)
1936         return 0;
1937
1938     /*
1939      * Count the operands
1940      */
1941     if (itemp->operands != instruction->operands)
1942         return 0;
1943
1944     /*
1945      * Check that no spurious colons or TOs are present
1946      */
1947     for (i = 0; i < itemp->operands; i++)
1948         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
1949             return 0;
1950
1951     /*
1952      * Process size flags
1953      */
1954     if (itemp->flags & IF_ARMASK) {
1955         memset(size, 0, sizeof size);
1956
1957         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
1958
1959         switch (itemp->flags & IF_SMASK) {
1960         case IF_SB:
1961             size[i] = BITS8;
1962             break;
1963         case IF_SW:
1964             size[i] = BITS16;
1965             break;
1966         case IF_SD:
1967             size[i] = BITS32;
1968             break;
1969         case IF_SQ:
1970             size[i] = BITS64;
1971             break;
1972         case IF_SO:
1973             size[i] = BITS128;
1974             break;
1975         case IF_SY:
1976             size[i] = BITS256;
1977             break;
1978         case IF_SZ:
1979             switch (bits) {
1980             case 16:
1981                 size[i] = BITS16;
1982                 break;
1983             case 32:
1984                 size[i] = BITS32;
1985                 break;
1986             case 64:
1987                 size[i] = BITS64;
1988                 break;
1989             }
1990             break;
1991         default:
1992             break;
1993         }
1994     } else {
1995         asize = 0;
1996         switch (itemp->flags & IF_SMASK) {
1997         case IF_SB:
1998             asize = BITS8;
1999             break;
2000         case IF_SW:
2001             asize = BITS16;
2002             break;
2003         case IF_SD:
2004             asize = BITS32;
2005             break;
2006         case IF_SQ:
2007             asize = BITS64;
2008             break;
2009         case IF_SO:
2010             asize = BITS128;
2011             break;
2012         case IF_SY:
2013             asize = BITS256;
2014             break;
2015         case IF_SZ:
2016             switch (bits) {
2017             case 16:
2018                 asize = BITS16;
2019                 break;
2020             case 32:
2021                 asize = BITS32;
2022                 break;
2023             case 64:
2024                 asize = BITS64;
2025                 break;
2026             }
2027             break;
2028         default:
2029             break;
2030         }
2031         for (i = 0; i < MAX_OPERANDS; i++)
2032             size[i] = asize;
2033     }
2034
2035     /*
2036      * Check that the operand flags all match up
2037      */
2038     for (i = 0; i < itemp->operands; i++) {
2039         int32_t type = instruction->oprs[i].type;
2040         if (!(type & SIZE_MASK))
2041             type |= size[i];
2042
2043         if (itemp->opd[i] & SAME_AS) {
2044             int j = itemp->opd[i] & ~SAME_AS;
2045             if (type != instruction->oprs[j].type ||
2046                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2047                 return 0;
2048         } else if (itemp->opd[i] & ~type ||
2049             ((itemp->opd[i] & SIZE_MASK) &&
2050              ((itemp->opd[i] ^ type) & SIZE_MASK))) {
2051             if ((itemp->opd[i] & ~type & ~SIZE_MASK) ||
2052                 (type & SIZE_MASK))
2053                 return 0;
2054             else
2055                 return 1;
2056         }
2057     }
2058
2059     /*
2060      * Check operand sizes
2061      */
2062     if (itemp->flags & (IF_SM | IF_SM2)) {
2063         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2064         asize = 0;
2065         for (i = 0; i < oprs; i++) {
2066             if ((asize = itemp->opd[i] & SIZE_MASK) != 0) {
2067                 int j;
2068                 for (j = 0; j < oprs; j++)
2069                     size[j] = asize;
2070                 break;
2071             }
2072         }
2073     } else {
2074         oprs = itemp->operands;
2075     }
2076
2077     for (i = 0; i < itemp->operands; i++) {
2078         if (!(itemp->opd[i] & SIZE_MASK) &&
2079             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2080             return 2;
2081     }
2082
2083     /*
2084      * Check template is okay at the set cpu level
2085      */
2086     if (((itemp->flags & IF_PLEVEL) > cpu))
2087         return 3;
2088
2089     /*
2090      * Verify the appropriate long mode flag.
2091      */
2092     if ((itemp->flags & (bits == 64 ? IF_NOLONG : IF_LONG)))
2093         return 4;
2094
2095     /*
2096      * Check if special handling needed for Jumps
2097      */
2098     if ((uint8_t)(itemp->code[0]) >= 0370)
2099         return 99;
2100
2101     return ret;
2102 }
2103
2104 static ea *process_ea(operand * input, ea * output, int bits,
2105                       int addrbits, int rfield, int32_t rflags)
2106 {
2107     bool forw_ref = !!(input->opflags & OPFLAG_UNKNOWN);
2108
2109     output->rip = false;
2110
2111     /* REX flags for the rfield operand */
2112     output->rex |= rexflags(rfield, rflags, REX_R|REX_P|REX_W|REX_H);
2113
2114     if (!(REGISTER & ~input->type)) {   /* register direct */
2115         int i;
2116         int32_t f;
2117
2118         if (input->basereg < EXPR_REG_START /* Verify as Register */
2119             || input->basereg >= REG_ENUM_LIMIT)
2120             return NULL;
2121         f = regflag(input);
2122         i = nasm_regvals[input->basereg];
2123
2124         if (REG_EA & ~f)
2125             return NULL;        /* Invalid EA register */
2126
2127         output->rex |= op_rexflags(input, REX_B|REX_P|REX_W|REX_H);
2128
2129         output->sib_present = false;             /* no SIB necessary */
2130         output->bytes = 0;  /* no offset necessary either */
2131         output->modrm = 0xC0 | ((rfield & 7) << 3) | (i & 7);
2132     } else {                    /* it's a memory reference */
2133         if (input->basereg == -1
2134             && (input->indexreg == -1 || input->scale == 0)) {
2135             /* it's a pure offset */
2136             if (bits == 64 && (~input->type & IP_REL)) {
2137               int scale, index, base;
2138               output->sib_present = true;
2139               scale = 0;
2140               index = 4;
2141               base = 5;
2142               output->sib = (scale << 6) | (index << 3) | base;
2143               output->bytes = 4;
2144               output->modrm = 4 | ((rfield & 7) << 3);
2145               output->rip = false;
2146             } else {
2147               output->sib_present = false;
2148               output->bytes = (addrbits != 16 ? 4 : 2);
2149               output->modrm = (addrbits != 16 ? 5 : 6) | ((rfield & 7) << 3);
2150               output->rip = bits == 64;
2151             }
2152         } else {                /* it's an indirection */
2153             int i = input->indexreg, b = input->basereg, s = input->scale;
2154             int32_t o = input->offset, seg = input->segment;
2155             int hb = input->hintbase, ht = input->hinttype;
2156             int t;
2157             int it, bt;
2158             int32_t ix, bx;     /* register flags */
2159
2160             if (s == 0)
2161                 i = -1;         /* make this easy, at least */
2162
2163             if (i >= EXPR_REG_START && i < REG_ENUM_LIMIT) {
2164                 it = nasm_regvals[i];
2165                 ix = nasm_reg_flags[i];
2166             } else {
2167                 it = -1;
2168                 ix = 0;
2169             }
2170
2171             if (b >= EXPR_REG_START && b < REG_ENUM_LIMIT) {
2172                 bt = nasm_regvals[b];
2173                 bx = nasm_reg_flags[b];
2174             } else {
2175                 bt = -1;
2176                 bx = 0;
2177             }
2178
2179             /* check for a 32/64-bit memory reference... */
2180             if ((ix|bx) & (BITS32|BITS64)) {
2181                 /* it must be a 32/64-bit memory reference. Firstly we have
2182                  * to check that all registers involved are type E/Rxx. */
2183                 int32_t sok = BITS32|BITS64;
2184
2185                 if (it != -1) {
2186                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2187                         sok &= ix;
2188                     else
2189                         return NULL;
2190                 }
2191
2192                 if (bt != -1) {
2193                     if (REG_GPR & ~bx)
2194                         return NULL; /* Invalid register */
2195                     if (~sok & bx & SIZE_MASK)
2196                         return NULL; /* Invalid size */
2197                     sok &= bx;
2198                 }
2199
2200                 /* While we're here, ensure the user didn't specify
2201                    WORD or QWORD. */
2202                 if (input->disp_size == 16 || input->disp_size == 64)
2203                     return NULL;
2204
2205                 if (addrbits == 16 ||
2206                     (addrbits == 32 && !(sok & BITS32)) ||
2207                     (addrbits == 64 && !(sok & BITS64)))
2208                     return NULL;
2209
2210                 /* now reorganize base/index */
2211                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2212                     ((hb == b && ht == EAH_NOTBASE)
2213                      || (hb == i && ht == EAH_MAKEBASE))) {
2214                     /* swap if hints say so */
2215                     t = bt, bt = it, it = t;
2216                     t = bx, bx = ix, ix = t;
2217                 }
2218                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2219                     bt = -1, bx = 0, s++;
2220                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2221                     /* make single reg base, unless hint */
2222                     bt = it, bx = ix, it = -1, ix = 0;
2223                 }
2224                 if (((s == 2 && it != REG_NUM_ESP
2225                       && !(input->eaflags & EAF_TIMESTWO)) || s == 3
2226                      || s == 5 || s == 9) && bt == -1)
2227                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2228                 if (it == -1 && (bt & 7) != REG_NUM_ESP
2229                     && (input->eaflags & EAF_TIMESTWO))
2230                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2231                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2232                 if (s == 1 && it == REG_NUM_ESP) {
2233                     /* swap ESP into base if scale is 1 */
2234                     t = it, it = bt, bt = t;
2235                     t = ix, ix = bx, bx = t;
2236                 }
2237                 if (it == REG_NUM_ESP
2238                     || (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2239                     return NULL;        /* wrong, for various reasons */
2240
2241                 output->rex |= rexflags(it, ix, REX_X);
2242                 output->rex |= rexflags(bt, bx, REX_B);
2243
2244                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2245                     /* no SIB needed */
2246                     int mod, rm;
2247
2248                     if (bt == -1) {
2249                         rm = 5;
2250                         mod = 0;
2251                     } else {
2252                         rm = (bt & 7);
2253                         if (rm != REG_NUM_EBP && o == 0 &&
2254                                 seg == NO_SEG && !forw_ref &&
2255                                 !(input->eaflags &
2256                                   (EAF_BYTEOFFS | EAF_WORDOFFS)))
2257                             mod = 0;
2258                         else if (input->eaflags & EAF_BYTEOFFS ||
2259                                  (o >= -128 && o <= 127 && seg == NO_SEG
2260                                   && !forw_ref
2261                                   && !(input->eaflags & EAF_WORDOFFS)))
2262                             mod = 1;
2263                         else
2264                             mod = 2;
2265                     }
2266
2267                     output->sib_present = false;
2268                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2269                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2270                 } else {
2271                     /* we need a SIB */
2272                     int mod, scale, index, base;
2273
2274                     if (it == -1)
2275                         index = 4, s = 1;
2276                     else
2277                         index = (it & 7);
2278
2279                     switch (s) {
2280                     case 1:
2281                         scale = 0;
2282                         break;
2283                     case 2:
2284                         scale = 1;
2285                         break;
2286                     case 4:
2287                         scale = 2;
2288                         break;
2289                     case 8:
2290                         scale = 3;
2291                         break;
2292                     default:   /* then what the smeg is it? */
2293                         return NULL;    /* panic */
2294                     }
2295
2296                     if (bt == -1) {
2297                         base = 5;
2298                         mod = 0;
2299                     } else {
2300                         base = (bt & 7);
2301                         if (base != REG_NUM_EBP && o == 0 &&
2302                                     seg == NO_SEG && !forw_ref &&
2303                                     !(input->eaflags &
2304                                       (EAF_BYTEOFFS | EAF_WORDOFFS)))
2305                             mod = 0;
2306                         else if (input->eaflags & EAF_BYTEOFFS ||
2307                                  (o >= -128 && o <= 127 && seg == NO_SEG
2308                                   && !forw_ref
2309                                   && !(input->eaflags & EAF_WORDOFFS)))
2310                             mod = 1;
2311                         else
2312                             mod = 2;
2313                     }
2314
2315                     output->sib_present = true;
2316                     output->bytes =  (bt == -1 || mod == 2 ? 4 : mod);
2317                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | 4;
2318                     output->sib = (scale << 6) | (index << 3) | base;
2319                 }
2320             } else {            /* it's 16-bit */
2321                 int mod, rm;
2322
2323                 /* check for 64-bit long mode */
2324                 if (addrbits == 64)
2325                     return NULL;
2326
2327                 /* check all registers are BX, BP, SI or DI */
2328                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI
2329                      && b != R_DI) || (i != -1 && i != R_BP && i != R_BX
2330                                        && i != R_SI && i != R_DI))
2331                     return NULL;
2332
2333                 /* ensure the user didn't specify DWORD/QWORD */
2334                 if (input->disp_size == 32 || input->disp_size == 64)
2335                     return NULL;
2336
2337                 if (s != 1 && i != -1)
2338                     return NULL;        /* no can do, in 16-bit EA */
2339                 if (b == -1 && i != -1) {
2340                     int tmp = b;
2341                     b = i;
2342                     i = tmp;
2343                 }               /* swap */
2344                 if ((b == R_SI || b == R_DI) && i != -1) {
2345                     int tmp = b;
2346                     b = i;
2347                     i = tmp;
2348                 }
2349                 /* have BX/BP as base, SI/DI index */
2350                 if (b == i)
2351                     return NULL;        /* shouldn't ever happen, in theory */
2352                 if (i != -1 && b != -1 &&
2353                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2354                     return NULL;        /* invalid combinations */
2355                 if (b == -1)    /* pure offset: handled above */
2356                     return NULL;        /* so if it gets to here, panic! */
2357
2358                 rm = -1;
2359                 if (i != -1)
2360                     switch (i * 256 + b) {
2361                     case R_SI * 256 + R_BX:
2362                         rm = 0;
2363                         break;
2364                     case R_DI * 256 + R_BX:
2365                         rm = 1;
2366                         break;
2367                     case R_SI * 256 + R_BP:
2368                         rm = 2;
2369                         break;
2370                     case R_DI * 256 + R_BP:
2371                         rm = 3;
2372                         break;
2373                 } else
2374                     switch (b) {
2375                     case R_SI:
2376                         rm = 4;
2377                         break;
2378                     case R_DI:
2379                         rm = 5;
2380                         break;
2381                     case R_BP:
2382                         rm = 6;
2383                         break;
2384                     case R_BX:
2385                         rm = 7;
2386                         break;
2387                     }
2388                 if (rm == -1)   /* can't happen, in theory */
2389                     return NULL;        /* so panic if it does */
2390
2391                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2392                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2393                     mod = 0;
2394                 else if (input->eaflags & EAF_BYTEOFFS ||
2395                          (o >= -128 && o <= 127 && seg == NO_SEG
2396                           && !forw_ref
2397                           && !(input->eaflags & EAF_WORDOFFS)))
2398                     mod = 1;
2399                 else
2400                     mod = 2;
2401
2402                 output->sib_present = false;    /* no SIB - it's 16-bit */
2403                 output->bytes = mod;    /* bytes of offset needed */
2404                 output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2405             }
2406         }
2407     }
2408
2409     output->size = 1 + output->sib_present + output->bytes;
2410     return output;
2411 }
2412
2413 static void add_asp(insn *ins, int addrbits)
2414 {
2415     int j, valid;
2416     int defdisp;
2417
2418     valid = (addrbits == 64) ? 64|32 : 32|16;
2419
2420     switch (ins->prefixes[PPS_ASIZE]) {
2421     case P_A16:
2422         valid &= 16;
2423         break;
2424     case P_A32:
2425         valid &= 32;
2426         break;
2427     case P_A64:
2428         valid &= 64;
2429         break;
2430     case P_ASP:
2431         valid &= (addrbits == 32) ? 16 : 32;
2432         break;
2433     default:
2434         break;
2435     }
2436
2437     for (j = 0; j < ins->operands; j++) {
2438         if (!(MEMORY & ~ins->oprs[j].type)) {
2439             int32_t i, b;
2440
2441             /* Verify as Register */
2442             if (ins->oprs[j].indexreg < EXPR_REG_START
2443                 || ins->oprs[j].indexreg >= REG_ENUM_LIMIT)
2444                 i = 0;
2445             else
2446                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2447
2448             /* Verify as Register */
2449             if (ins->oprs[j].basereg < EXPR_REG_START
2450                 || ins->oprs[j].basereg >= REG_ENUM_LIMIT)
2451                 b = 0;
2452             else
2453                 b = nasm_reg_flags[ins->oprs[j].basereg];
2454
2455             if (ins->oprs[j].scale == 0)
2456                 i = 0;
2457
2458             if (!i && !b) {
2459                 int ds = ins->oprs[j].disp_size;
2460                 if ((addrbits != 64 && ds > 8) ||
2461                     (addrbits == 64 && ds == 16))
2462                     valid &= ds;
2463             } else {
2464                 if (!(REG16 & ~b))
2465                     valid &= 16;
2466                 if (!(REG32 & ~b))
2467                     valid &= 32;
2468                 if (!(REG64 & ~b))
2469                     valid &= 64;
2470
2471                 if (!(REG16 & ~i))
2472                     valid &= 16;
2473                 if (!(REG32 & ~i))
2474                     valid &= 32;
2475                 if (!(REG64 & ~i))
2476                     valid &= 64;
2477             }
2478         }
2479     }
2480
2481     if (valid & addrbits) {
2482         ins->addr_size = addrbits;
2483     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2484         /* Add an address size prefix */
2485         enum prefixes pref = (addrbits == 32) ? P_A16 : P_A32;
2486         ins->prefixes[PPS_ASIZE] = pref;
2487         ins->addr_size = (addrbits == 32) ? 16 : 32;
2488     } else {
2489         /* Impossible... */
2490         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2491         ins->addr_size = addrbits; /* Error recovery */
2492     }
2493
2494     defdisp = ins->addr_size == 16 ? 16 : 32;
2495
2496     for (j = 0; j < ins->operands; j++) {
2497         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2498             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp)
2499             != ins->addr_size) {
2500             /* mem_offs sizes must match the address size; if not,
2501                strip the MEM_OFFS bit and match only EA instructions */
2502             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2503         }
2504     }
2505 }