assemble.c

   1 /* assemble.c   code generation for the Netwide Assembler
   2  *
   3  * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
   4  * Julian Hall. All rights reserved. The software is
   5  * redistributable under the license given in the file "LICENSE"
   6  * distributed in the NASM archive.
   7  *
   8  * the actual codes (C syntax, i.e. octal):
   9  * \0            - terminates the code. (Unless it's a literal of course.)
  10  * \1..\4        - that many literal bytes follow in the code stream
  11  * \5            - add 4 to the primary operand number (b, low octdigit)
  12  * \6            - add 4 to the secondary operand number (a, middle octdigit)
  13  * \7            - add 4 to both the primary and the secondary operand number
  14  * \10..\13      - a literal byte follows in the code stream, to be added
  15  *                 to the register value of operand 0..3
  16  * \14..\17      - a signed byte immediate operand, from operand 0..3
  17  * \20..\23      - a byte immediate operand, from operand 0..3
  18  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  19  * \30..\33      - a word immediate operand, from operand 0..3
  20  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  21  *                 assembly mode or the operand-size override on the operand
  22  * \40..\43      - a long immediate operand, from operand 0..3
  23  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  24  *                 depending on the address size of the instruction.
  25  * \50..\53      - a byte relative operand, from operand 0..3
  26  * \54..\57      - a qword immediate operand, from operand 0..3
  27  * \60..\63      - a word relative operand, from operand 0..3
  28  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  29  *                 assembly mode or the operand-size override on the operand
  30  * \70..\73      - a long relative operand, from operand 0..3
  31  * \74..\77      - a word constant, from the _segment_ part of operand 0..3
  32  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  33  *                 field the register value of operand b.
  34  * \140..\143    - an immediate word or signed byte for operand 0..3
  35  * \144..\147    - or 2 (s-field) into opcode byte if operand 0..3
  36  *                  is a signed byte rather than a word.  Opcode byte follows.
  37  * \150..\153    - an immediate dword or signed byte for operand 0..3
  38  * \154..\157    - or 2 (s-field) into opcode byte if operand 0..3
  39  *                  is a signed byte rather than a dword.  Opcode byte follows.
  40  * \160..\163    - this instruction uses DREX rather than REX, with the
  41  *                 OC0 field set to 0, and the dest field taken from
  42  *                 operand 0..3.
  43  * \164..\167    - this instruction uses DREX rather than REX, with the
  44  *                 OC0 field set to 1, and the dest field taken from
  45  *                 operand 0..3.
  46  * \171          - placement of DREX suffix in the absence of an EA
  47  * \172\ab       - the register number from operand a in bits 7..4, with
  48  *                 the 4-bit immediate from operand b in bits 3..0.
  49  * \173\xab      - the register number from operand a in bits 7..4, with
  50  *                 the value b in bits 3..0.
  51  * \174\a        - the register number from operand a in bits 7..4, and
  52  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  53  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  54  *                 field equal to digit b.
  55  * \250..\253    - same as \150..\153, except warn if the 64-bit operand
  56  *                 is not equal to the truncated and sign-extended 32-bit
  57  *                 operand; used for 32-bit immediates in 64-bit mode.
  58  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
  59  * \260..\263    - this instruction uses VEX rather than REX, with the
  60  *                 V field taken from operand 0..3.
  61  * \270          - this instruction uses VEX rather than REX, with the
  62  *                 V field set to 1111b.
  63  *
  64  * VEX prefixes are followed by the sequence:
  65  * \mm\wlp         where mm is the M field; and wlp is:
  66  *                 00 0ww lpp
  67  *                 [w0] ww = 0 for W = 0
  68  *                 [w1] ww = 1 for W = 1
  69  *                 [wx] ww = 2 for W don't care (always assembled as 0)
  70  *                 [ww] ww = 3 for W used as REX.W
  71  *
  72  *
  73  * \274..\277    - a signed byte immediate operand, from operand 0..3,
  74  *                 which is to be extended to the operand size.
  75  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
  76  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
  77  * \312          - (disassembler only) marker on LOOP, LOOPxx instructions.
  78  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
  79  * \314          - (disassembler only) invalid with REX.B
  80  * \315          - (disassembler only) invalid with REX.X
  81  * \316          - (disassembler only) invalid with REX.R
  82  * \317          - (disassembler only) invalid with REX.W
  83  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
  84  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
  85  * \322          - indicates that this instruction is only valid when the
  86  *                 operand size is the default (instruction to disassembler,
  87  *                 generates no code in the assembler)
  88  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
  89  * \324          - indicates 64-bit operand size requiring REX prefix.
  90  * \330          - a literal byte follows in the code stream, to be added
  91  *                 to the condition code value of the instruction.
  92  * \331          - instruction not valid with REP prefix.  Hint for
  93  *                 disassembler only; for SSE instructions.
  94  * \332          - REP prefix (0xF2 byte) used as opcode extension.
  95  * \333          - REP prefix (0xF3 byte) used as opcode extension.
  96  * \334          - LOCK prefix used instead of REX.R
  97  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
  98  * \336          - force a REP(E) prefix (0xF2) even if not specified.
  99  * \337          - force a REPNE prefix (0xF3) even if not specified.
 100  *                 \336-\337 are still listed as prefixes in the disassembler.
 101  * \340          - reserve <operand 0> bytes of uninitialized storage.
 102  *                 Operand 0 had better be a segmentless constant.
 103  * \344,\345     - the PUSH/POP (respectively) codes for CS, DS, ES, SS
 104  *                 (POP is never used for CS) depending on operand 0
 105  * \346,\347     - the second byte of PUSH/POP codes for FS, GS, depending
 106  *                 on operand 0
 107  * \360          - no SSE prefix (== \364\331)
 108  * \361          - 66 SSE prefix (== \366\331)
 109  * \362          - F2 SSE prefix (== \364\332)
 110  * \363          - F3 SSE prefix (== \364\333)
 111  * \364          - operand-size prefix (0x66) not permitted
 112  * \365          - address-size prefix (0x67) not permitted
 113  * \366          - operand-size prefix (0x66) used as opcode extension
 114  * \367          - address-size prefix (0x67) used as opcode extension
 115  * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
 116  *                 370 is used for Jcc, 371 is used for JMP.
 117  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 118  *                 used for conditional jump over longer jump
 119  */
 120
 121 #include "compiler.h"
 122
 123 #include <stdio.h>
 124 #include <string.h>
 125 #include <inttypes.h>
 126
 127 #include "nasm.h"
 128 #include "nasmlib.h"
 129 #include "assemble.h"
 130 #include "insns.h"
 131 #include "tables.h"
 132
 133 /* Initialized to zero by the C standard */
 134 static const uint8_t const_zero_buf[256];
 135
 136 typedef struct {
 137     int sib_present;                 /* is a SIB byte necessary? */
 138     int bytes;                       /* # of bytes of offset needed */
 139     int size;                        /* lazy - this is sib+bytes+1 */
 140     uint8_t modrm, sib, rex, rip;    /* the bytes themselves */
 141 } ea;
 142
 143 static uint32_t cpu;            /* cpu level received from nasm.c */
 144 static efunc errfunc;
 145 static struct ofmt *outfmt;
 146 static ListGen *list;
 147
 148 static int64_t calcsize(int32_t, int64_t, int, insn *, const uint8_t *);
 149 static void gencode(int32_t segment, int64_t offset, int bits,
 150                     insn * ins, const struct itemplate *temp,
 151                     int64_t insn_end);
 152 static int matches(const struct itemplate *, insn *, int bits);
 153 static int32_t regflag(const operand *);
 154 static int32_t regval(const operand *);
 155 static int rexflags(int, int32_t, int);
 156 static int op_rexflags(const operand *, int);
 157 static ea *process_ea(operand *, ea *, int, int, int, int32_t);
 158 static void add_asp(insn *, int);
 159
 160 static int has_prefix(insn * ins, enum prefix_pos pos, enum prefixes prefix)
 161 {
 162     return ins->prefixes[pos] == prefix;
 163 }
 164
 165 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 166 {
 167     if (ins->prefixes[pos])
 168         errfunc(ERR_NONFATAL, "invalid %s prefix",
 169                 prefix_name(ins->prefixes[pos]));
 170 }
 171
 172 static const char *size_name(int size)
 173 {
 174     switch (size) {
 175     case 1:
 176         return "byte";
 177     case 2:
 178         return "word";
 179     case 4:
 180         return "dword";
 181     case 8:
 182         return "qword";
 183     case 10:
 184         return "tword";
 185     case 16:
 186         return "oword";
 187     case 32:
 188         return "yword";
 189     default:
 190         return "???";
 191     }
 192 }
 193
 194 static void warn_overflow(int size, const struct operand *o)
 195 {
 196     if (size < 8 && o->wrt == NO_SEG && o->segment == NO_SEG) {
 197         int64_t lim = ((int64_t)1 << (size*8))-1;
 198         int64_t data = o->offset;
 199
 200         if (data < ~lim || data > lim)
 201             errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
 202                     "%s data exceeds bounds", size_name(size));
 203     }
 204 }
 205 /*
 206  * This routine wrappers the real output format's output routine,
 207  * in order to pass a copy of the data off to the listing file
 208  * generator at the same time.
 209  */
 210 static void out(int64_t offset, int32_t segto, const void *data,
 211                 enum out_type type, uint64_t size,
 212                 int32_t segment, int32_t wrt)
 213 {
 214     static int32_t lineno = 0;     /* static!!! */
 215     static char *lnfname = NULL;
 216     uint8_t p[8];
 217
 218     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 219         /*
 220          * This is a non-relocated address, and we're going to
 221          * convert it into RAWDATA format.
 222          */
 223         uint8_t *q = p;
 224
 225         if (size > 8) {
 226             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 227             return;
 228         }
 229
 230         WRITEADDR(q, *(int64_t *)data, size);
 231         data = p;
 232         type = OUT_RAWDATA;
 233     }
 234
 235     list->output(offset, data, type, size);
 236
 237     /*
 238      * this call to src_get determines when we call the
 239      * debug-format-specific "linenum" function
 240      * it updates lineno and lnfname to the current values
 241      * returning 0 if "same as last time", -2 if lnfname
 242      * changed, and the amount by which lineno changed,
 243      * if it did. thus, these variables must be static
 244      */
 245
 246     if (src_get(&lineno, &lnfname)) {
 247         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 248     }
 249
 250     outfmt->output(segto, data, type, size, segment, wrt);
 251 }
 252
 253 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 254                      insn * ins, const uint8_t *code)
 255 {
 256     int64_t isize;
 257     uint8_t c = code[0];
 258
 259     if ((c != 0370 && c != 0371) || (ins->oprs[0].type & STRICT))
 260         return false;
 261     if (!optimizing)
 262         return false;
 263     if (optimizing < 0 && c == 0371)
 264         return false;
 265
 266     isize = calcsize(segment, offset, bits, ins, code);
 267     if (ins->oprs[0].segment != segment)
 268         return false;
 269
 270     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 271     return (isize >= -128 && isize <= 127); /* is it byte size? */
 272 }
 273
 274 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 275               insn * instruction, struct ofmt *output, efunc error,
 276               ListGen * listgen)
 277 {
 278     const struct itemplate *temp;
 279     int j;
 280     int size_prob;
 281     int64_t insn_end;
 282     int32_t itimes;
 283     int64_t start = offset;
 284     int64_t wsize = 0;             /* size for DB etc. */
 285
 286     errfunc = error;            /* to pass to other functions */
 287     cpu = cp;
 288     outfmt = output;            /* likewise */
 289     list = listgen;             /* and again */
 290
 291     switch (instruction->opcode) {
 292     case -1:
 293         return 0;
 294     case I_DB:
 295         wsize = 1;
 296         break;
 297     case I_DW:
 298         wsize = 2;
 299         break;
 300     case I_DD:
 301         wsize = 4;
 302         break;
 303     case I_DQ:
 304         wsize = 8;
 305         break;
 306     case I_DT:
 307         wsize = 10;
 308         break;
 309     case I_DO:
 310         wsize = 16;
 311         break;
 312     case I_DY:
 313         wsize = 32;
 314         break;
 315     default:
 316         break;
 317     }
 318
 319     if (wsize) {
 320         extop *e;
 321         int32_t t = instruction->times;
 322         if (t < 0)
 323             errfunc(ERR_PANIC,
 324                     "instruction->times < 0 (%ld) in assemble()", t);
 325
 326         while (t--) {           /* repeat TIMES times */
 327             for (e = instruction->eops; e; e = e->next) {
 328                 if (e->type == EOT_DB_NUMBER) {
 329                     if (wsize == 1) {
 330                         if (e->segment != NO_SEG)
 331                             errfunc(ERR_NONFATAL,
 332                                     "one-byte relocation attempted");
 333                         else {
 334                             uint8_t out_byte = e->offset;
 335                             out(offset, segment, &out_byte,
 336                                 OUT_RAWDATA, 1, NO_SEG, NO_SEG);
 337                         }
 338                     } else if (wsize > 8) {
 339                         errfunc(ERR_NONFATAL,
 340                                 "integer supplied to a DT, DO or DY"
 341                                 " instruction");
 342                     } else
 343                         out(offset, segment, &e->offset,
 344                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 345                     offset += wsize;
 346                 } else if (e->type == EOT_DB_STRING ||
 347                            e->type == EOT_DB_STRING_FREE) {
 348                     int align;
 349
 350                     out(offset, segment, e->stringval,
 351                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 352                     align = e->stringlen % wsize;
 353
 354                     if (align) {
 355                         align = wsize - align;
 356                         out(offset, segment, const_zero_buf,
 357                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 358                     }
 359                     offset += e->stringlen + align;
 360                 }
 361             }
 362             if (t > 0 && t == instruction->times - 1) {
 363                 /*
 364                  * Dummy call to list->output to give the offset to the
 365                  * listing module.
 366                  */
 367                 list->output(offset, NULL, OUT_RAWDATA, 0);
 368                 list->uplevel(LIST_TIMES);
 369             }
 370         }
 371         if (instruction->times > 1)
 372             list->downlevel(LIST_TIMES);
 373         return offset - start;
 374     }
 375
 376     if (instruction->opcode == I_INCBIN) {
 377         const char *fname = instruction->eops->stringval;
 378         FILE *fp;
 379
 380         fp = fopen(fname, "rb");
 381         if (!fp) {
 382             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 383                   fname);
 384         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 385             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 386                   fname);
 387         } else {
 388             static char buf[4096];
 389             size_t t = instruction->times;
 390             size_t base = 0;
 391             size_t len;
 392
 393             len = ftell(fp);
 394             if (instruction->eops->next) {
 395                 base = instruction->eops->next->offset;
 396                 len -= base;
 397                 if (instruction->eops->next->next &&
 398                     len > (size_t)instruction->eops->next->next->offset)
 399                     len = (size_t)instruction->eops->next->next->offset;
 400             }
 401             /*
 402              * Dummy call to list->output to give the offset to the
 403              * listing module.
 404              */
 405             list->output(offset, NULL, OUT_RAWDATA, 0);
 406             list->uplevel(LIST_INCBIN);
 407             while (t--) {
 408                 size_t l;
 409
 410                 fseek(fp, base, SEEK_SET);
 411                 l = len;
 412                 while (l > 0) {
 413                     int32_t m =
 414                         fread(buf, 1, (l > (int32_t) sizeof(buf) ? (int32_t) sizeof(buf) : l),
 415                               fp);
 416                     if (!m) {
 417                         /*
 418                          * This shouldn't happen unless the file
 419                          * actually changes while we are reading
 420                          * it.
 421                          */
 422                         error(ERR_NONFATAL,
 423                               "`incbin': unexpected EOF while"
 424                               " reading file `%s'", fname);
 425                         t = 0;  /* Try to exit cleanly */
 426                         break;
 427                     }
 428                     out(offset, segment, buf, OUT_RAWDATA, m,
 429                         NO_SEG, NO_SEG);
 430                     l -= m;
 431                 }
 432             }
 433             list->downlevel(LIST_INCBIN);
 434             if (instruction->times > 1) {
 435                 /*
 436                  * Dummy call to list->output to give the offset to the
 437                  * listing module.
 438                  */
 439                 list->output(offset, NULL, OUT_RAWDATA, 0);
 440                 list->uplevel(LIST_TIMES);
 441                 list->downlevel(LIST_TIMES);
 442             }
 443             fclose(fp);
 444             return instruction->times * len;
 445         }
 446         return 0;               /* if we're here, there's an error */
 447     }
 448
 449     /* Check to see if we need an address-size prefix */
 450     add_asp(instruction, bits);
 451
 452     size_prob = 0;
 453
 454     for (temp = nasm_instructions[instruction->opcode]; temp->opcode != -1; temp++){
 455         int m = matches(temp, instruction, bits);
 456         if (m == 100 ||
 457             (m == 99 && jmp_match(segment, offset, bits,
 458                                   instruction, temp->code))) {
 459             /* Matches! */
 460             int64_t insn_size = calcsize(segment, offset, bits,
 461                                       instruction, temp->code);
 462             itimes = instruction->times;
 463             if (insn_size < 0)  /* shouldn't be, on pass two */
 464                 error(ERR_PANIC, "errors made it through from pass one");
 465             else
 466                 while (itimes--) {
 467                     for (j = 0; j < MAXPREFIX; j++) {
 468                         uint8_t c = 0;
 469                         switch (instruction->prefixes[j]) {
 470                         case P_LOCK:
 471                             c = 0xF0;
 472                             break;
 473                         case P_REPNE:
 474                         case P_REPNZ:
 475                             c = 0xF2;
 476                             break;
 477                         case P_REPE:
 478                         case P_REPZ:
 479                         case P_REP:
 480                             c = 0xF3;
 481                             break;
 482                         case R_CS:
 483                             if (bits == 64) {
 484                                 error(ERR_WARNING | ERR_PASS2,
 485                                       "cs segment base generated, but will be ignored in 64-bit mode");
 486                             }
 487                             c = 0x2E;
 488                             break;
 489                         case R_DS:
 490                             if (bits == 64) {
 491                                 error(ERR_WARNING | ERR_PASS2,
 492                                       "ds segment base generated, but will be ignored in 64-bit mode");
 493                             }
 494                             c = 0x3E;
 495                             break;
 496                         case R_ES:
 497                            if (bits == 64) {
 498                                 error(ERR_WARNING | ERR_PASS2,
 499                                       "es segment base generated, but will be ignored in 64-bit mode");
 500                            }
 501                             c = 0x26;
 502                             break;
 503                         case R_FS:
 504                             c = 0x64;
 505                             break;
 506                         case R_GS:
 507                             c = 0x65;
 508                             break;
 509                         case R_SS:
 510                             if (bits == 64) {
 511                                 error(ERR_WARNING | ERR_PASS2,
 512                                       "ss segment base generated, but will be ignored in 64-bit mode");
 513                             }
 514                             c = 0x36;
 515                             break;
 516                         case R_SEGR6:
 517                         case R_SEGR7:
 518                             error(ERR_NONFATAL,
 519                                   "segr6 and segr7 cannot be used as prefixes");
 520                             break;
 521                         case P_A16:
 522                             if (bits == 64) {
 523                                 error(ERR_NONFATAL,
 524                                       "16-bit addressing is not supported "
 525                                       "in 64-bit mode");
 526                             } else if (bits != 16)
 527                                 c = 0x67;
 528                             break;
 529                         case P_A32:
 530                             if (bits != 32)
 531                                 c = 0x67;
 532                             break;
 533                         case P_A64:
 534                             if (bits != 64) {
 535                                 error(ERR_NONFATAL,
 536                                       "64-bit addressing is only supported "
 537                                       "in 64-bit mode");
 538                             }
 539                             break;
 540                         case P_ASP:
 541                             c = 0x67;
 542                             break;
 543                         case P_O16:
 544                             if (bits != 16)
 545                                 c = 0x66;
 546                             break;
 547                         case P_O32:
 548                             if (bits == 16)
 549                                 c = 0x66;
 550                             break;
 551                         case P_O64:
 552                             /* REX.W */
 553                             break;
 554                         case P_OSP:
 555                             c = 0x66;
 556                             break;
 557                         case P_none:
 558                             break;
 559                         default:
 560                             error(ERR_PANIC, "invalid instruction prefix");
 561                         }
 562                         if (c != 0) {
 563                             out(offset, segment, &c, OUT_RAWDATA, 1,
 564                                 NO_SEG, NO_SEG);
 565                             offset++;
 566                         }
 567                     }
 568                     insn_end = offset + insn_size;
 569                     gencode(segment, offset, bits, instruction,
 570                             temp, insn_end);
 571                     offset += insn_size;
 572                     if (itimes > 0 && itimes == instruction->times - 1) {
 573                         /*
 574                          * Dummy call to list->output to give the offset to the
 575                          * listing module.
 576                          */
 577                         list->output(offset, NULL, OUT_RAWDATA, 0);
 578                         list->uplevel(LIST_TIMES);
 579                     }
 580                 }
 581             if (instruction->times > 1)
 582                 list->downlevel(LIST_TIMES);
 583             return offset - start;
 584         } else if (m > 0 && m > size_prob) {
 585             size_prob = m;
 586         }
 587     }
 588
 589     if (temp->opcode == -1) {   /* didn't match any instruction */
 590         switch (size_prob) {
 591         case 1:
 592             error(ERR_NONFATAL, "operation size not specified");
 593             break;
 594         case 2:
 595             error(ERR_NONFATAL, "mismatch in operand sizes");
 596             break;
 597         case 3:
 598             error(ERR_NONFATAL, "no instruction for this cpu level");
 599             break;
 600         case 4:
 601             error(ERR_NONFATAL, "instruction not supported in %d-bit mode",
 602                   bits);
 603             break;
 604         default:
 605             error(ERR_NONFATAL,
 606                   "invalid combination of opcode and operands");
 607             break;
 608         }
 609     }
 610     return 0;
 611 }
 612
 613 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 614                insn * instruction, efunc error)
 615 {
 616     const struct itemplate *temp;
 617
 618     errfunc = error;            /* to pass to other functions */
 619     cpu = cp;
 620
 621     if (instruction->opcode == -1)
 622         return 0;
 623
 624     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 625         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 626         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 627         instruction->opcode == I_DY) {
 628         extop *e;
 629         int32_t isize, osize, wsize = 0;   /* placate gcc */
 630
 631         isize = 0;
 632         switch (instruction->opcode) {
 633         case I_DB:
 634             wsize = 1;
 635             break;
 636         case I_DW:
 637             wsize = 2;
 638             break;
 639         case I_DD:
 640             wsize = 4;
 641             break;
 642         case I_DQ:
 643             wsize = 8;
 644             break;
 645         case I_DT:
 646             wsize = 10;
 647             break;
 648         case I_DO:
 649             wsize = 16;
 650             break;
 651         case I_DY:
 652             wsize = 32;
 653             break;
 654         default:
 655             break;
 656         }
 657
 658         for (e = instruction->eops; e; e = e->next) {
 659             int32_t align;
 660
 661             osize = 0;
 662             if (e->type == EOT_DB_NUMBER)
 663                 osize = 1;
 664             else if (e->type == EOT_DB_STRING ||
 665                      e->type == EOT_DB_STRING_FREE)
 666                 osize = e->stringlen;
 667
 668             align = (-osize) % wsize;
 669             if (align < 0)
 670                 align += wsize;
 671             isize += osize + align;
 672         }
 673         return isize * instruction->times;
 674     }
 675
 676     if (instruction->opcode == I_INCBIN) {
 677         const char *fname = instruction->eops->stringval;
 678         FILE *fp;
 679         size_t len;
 680
 681         fp = fopen(fname, "rb");
 682         if (!fp)
 683             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 684                   fname);
 685         else if (fseek(fp, 0L, SEEK_END) < 0)
 686             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 687                   fname);
 688         else {
 689             len = ftell(fp);
 690             fclose(fp);
 691             if (instruction->eops->next) {
 692                 len -= instruction->eops->next->offset;
 693                 if (instruction->eops->next->next &&
 694                     len > (size_t)instruction->eops->next->next->offset) {
 695                     len = (size_t)instruction->eops->next->next->offset;
 696                 }
 697             }
 698             return instruction->times * len;
 699         }
 700         return 0;               /* if we're here, there's an error */
 701     }
 702
 703     /* Check to see if we need an address-size prefix */
 704     add_asp(instruction, bits);
 705
 706     for (temp = nasm_instructions[instruction->opcode]; temp->opcode != -1; temp++) {
 707         int m = matches(temp, instruction, bits);
 708         if (m == 100 ||
 709             (m == 99 && jmp_match(segment, offset, bits,
 710                                   instruction, temp->code))) {
 711             /* we've matched an instruction. */
 712             int64_t isize;
 713             const uint8_t *codes = temp->code;
 714             int j;
 715
 716             isize = calcsize(segment, offset, bits, instruction, codes);
 717             if (isize < 0)
 718                 return -1;
 719             for (j = 0; j < MAXPREFIX; j++) {
 720                 switch (instruction->prefixes[j]) {
 721                 case P_A16:
 722                     if (bits != 16)
 723                         isize++;
 724                     break;
 725                 case P_A32:
 726                     if (bits != 32)
 727                         isize++;
 728                     break;
 729                 case P_O16:
 730                     if (bits != 16)
 731                         isize++;
 732                     break;
 733                 case P_O32:
 734                     if (bits == 16)
 735                         isize++;
 736                     break;
 737                 case P_A64:
 738                 case P_O64:
 739                 case P_none:
 740                     break;
 741                 default:
 742                     isize++;
 743                     break;
 744                 }
 745             }
 746             return isize * instruction->times;
 747         }
 748     }
 749     return -1;                  /* didn't match any instruction */
 750 }
 751
 752 static bool possible_sbyte(operand *o)
 753 {
 754     return o->wrt == NO_SEG && o->segment == NO_SEG &&
 755         !(o->opflags & OPFLAG_FORWARD) &&
 756         optimizing >= 0 && !(o->type & STRICT);
 757 }
 758
 759 /* check that opn[op]  is a signed byte of size 16 or 32 */
 760 static bool is_sbyte16(operand *o)
 761 {
 762     int16_t v;
 763
 764     if (!possible_sbyte(o))
 765         return false;
 766
 767     v = o->offset;
 768     return v >= -128 && v <= 127;
 769 }
 770
 771 static bool is_sbyte32(operand *o)
 772 {
 773     int32_t v;
 774
 775     if (!possible_sbyte(o))
 776         return false;
 777
 778     v = o->offset;
 779     return v >= -128 && v <= 127;
 780 }
 781
 782 /* Common construct */
 783 #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
 784
 785 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 786                         insn * ins, const uint8_t *codes)
 787 {
 788     int64_t length = 0;
 789     uint8_t c;
 790     int rex_mask = ~0;
 791     int op1, op2;
 792     struct operand *opx;
 793     uint8_t opex = 0;
 794
 795     ins->rex = 0;               /* Ensure REX is reset */
 796
 797     if (ins->prefixes[PPS_OSIZE] == P_O64)
 798         ins->rex |= REX_W;
 799
 800     (void)segment;              /* Don't warn that this parameter is unused */
 801     (void)offset;               /* Don't warn that this parameter is unused */
 802
 803     while (*codes) {
 804         c = *codes++;
 805         op1 = (c & 3) + ((opex & 1) << 2);
 806         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
 807         opx = &ins->oprs[op1];
 808         opex = 0;               /* For the next iteration */
 809
 810         switch (c) {
 811         case 01:
 812         case 02:
 813         case 03:
 814         case 04:
 815             codes += c, length += c;
 816             break;
 817
 818         case 05:
 819         case 06:
 820         case 07:
 821             opex = c;
 822             break;
 823
 824         case4(010):
 825             ins->rex |=
 826                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 827             codes++, length++;
 828             break;
 829
 830         case4(014):
 831         case4(020):
 832         case4(024):
 833             length++;
 834             break;
 835
 836         case4(030):
 837             length += 2;
 838             break;
 839
 840         case4(034):
 841             if (opx->type & (BITS16 | BITS32 | BITS64))
 842                 length += (opx->type & BITS16) ? 2 : 4;
 843             else
 844                 length += (bits == 16) ? 2 : 4;
 845             break;
 846
 847         case4(040):
 848             length += 4;
 849             break;
 850
 851         case4(044):
 852             length += ins->addr_size >> 3;
 853             break;
 854
 855         case4(050):
 856             length++;
 857             break;
 858
 859         case4(054):
 860             length += 8; /* MOV reg64/imm */
 861             break;
 862
 863         case4(060):
 864             length += 2;
 865             break;
 866
 867         case4(064):
 868             if (opx->type & (BITS16 | BITS32 | BITS64))
 869                 length += (opx->type & BITS16) ? 2 : 4;
 870             else
 871                 length += (bits == 16) ? 2 : 4;
 872             break;
 873
 874         case4(070):
 875             length += 4;
 876             break;
 877
 878         case4(074):
 879             length += 2;
 880             break;
 881
 882         case4(0140):
 883             length += is_sbyte16(opx) ? 1 : 2;
 884             break;
 885
 886         case4(0144):
 887             codes++;
 888             length++;
 889             break;
 890
 891         case4(0150):
 892             length += is_sbyte32(opx) ? 1 : 4;
 893             break;
 894
 895         case4(0154):
 896             codes++;
 897             length++;
 898             break;
 899
 900         case4(0160):
 901             length++;
 902             ins->rex |= REX_D;
 903             ins->drexdst = regval(opx);
 904             break;
 905
 906         case4(0164):
 907             length++;
 908             ins->rex |= REX_D|REX_OC;
 909             ins->drexdst = regval(opx);
 910             break;
 911
 912         case 0171:
 913             break;
 914
 915         case 0172:
 916         case 0173:
 917         case 0174:
 918             codes++;
 919             length++;
 920             break;
 921
 922         case4(0250):
 923             length += is_sbyte32(opx) ? 1 : 4;
 924             break;
 925
 926         case4(0254):
 927             length += 4;
 928             break;
 929
 930         case4(0260):
 931             ins->rex |= REX_V;
 932             ins->drexdst = regval(opx);
 933             ins->vex_m = *codes++;
 934             ins->vex_wlp = *codes++;
 935             break;
 936
 937         case 0270:
 938             ins->rex |= REX_V;
 939             ins->drexdst = 0;
 940             ins->vex_m = *codes++;
 941             ins->vex_wlp = *codes++;
 942             break;
 943
 944         case4(0274):
 945             length++;
 946             break;
 947
 948         case4(0300):
 949             break;
 950
 951         case 0310:
 952             if (bits == 64)
 953                 return -1;
 954             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
 955             break;
 956
 957         case 0311:
 958             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
 959             break;
 960
 961         case 0312:
 962             break;
 963
 964         case 0313:
 965             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
 966                 has_prefix(ins, PPS_ASIZE, P_A32))
 967                 return -1;
 968             break;
 969
 970         case4(0314):
 971             break;
 972
 973         case 0320:
 974             length += (bits != 16);
 975             break;
 976
 977         case 0321:
 978             length += (bits == 16);
 979             break;
 980
 981         case 0322:
 982             break;
 983
 984         case 0323:
 985             rex_mask &= ~REX_W;
 986             break;
 987
 988         case 0324:
 989             ins->rex |= REX_W;
 990             break;
 991
 992         case 0330:
 993             codes++, length++;
 994             break;
 995
 996         case 0331:
 997             break;
 998
 999         case 0332:
1000         case 0333:
1001             length++;
1002             break;
1003
1004         case 0334:
1005             ins->rex |= REX_L;
1006             break;
1007
1008         case 0335:
1009             break;
1010
1011         case 0336:
1012             if (!ins->prefixes[PPS_LREP])
1013                 ins->prefixes[PPS_LREP] = P_REP;
1014             break;
1015
1016         case 0337:
1017             if (!ins->prefixes[PPS_LREP])
1018                 ins->prefixes[PPS_LREP] = P_REPNE;
1019             break;
1020
1021         case 0340:
1022             if (ins->oprs[0].segment != NO_SEG)
1023                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1024                         " quantity of BSS space");
1025             else
1026                 length += ins->oprs[0].offset;
1027             break;
1028
1029         case4(0344):
1030             length++;
1031             break;
1032
1033         case 0360:
1034             break;
1035
1036         case 0361:
1037         case 0362:
1038         case 0363:
1039             length++;
1040             break;
1041
1042         case 0364:
1043         case 0365:
1044             break;
1045
1046         case 0366:
1047         case 0367:
1048             length++;
1049             break;
1050
1051         case 0370:
1052         case 0371:
1053         case 0372:
1054             break;
1055
1056         case 0373:
1057             length++;
1058             break;
1059
1060         case4(0100):
1061         case4(0110):
1062         case4(0120):
1063         case4(0130):
1064         case4(0200):
1065         case4(0204):
1066         case4(0210):
1067         case4(0214):
1068         case4(0220):
1069         case4(0224):
1070         case4(0230):
1071         case4(0234):
1072             {
1073                 ea ea_data;
1074                 int rfield;
1075                 int32_t rflags;
1076                 struct operand *opy = &ins->oprs[op2];
1077
1078                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1079
1080                 if (c <= 0177) {
1081                     /* pick rfield from operand b (opx) */
1082                     rflags = regflag(opx);
1083                     rfield = nasm_regvals[opx->basereg];
1084                 } else {
1085                     rflags = 0;
1086                     rfield = c & 7;
1087                 }
1088                 if (!process_ea(opy, &ea_data, bits,
1089                                 ins->addr_size, rfield, rflags)) {
1090                     errfunc(ERR_NONFATAL, "invalid effective address");
1091                     return -1;
1092                 } else {
1093                     ins->rex |= ea_data.rex;
1094                     length += ea_data.size;
1095                 }
1096             }
1097             break;
1098
1099         default:
1100             errfunc(ERR_PANIC, "internal instruction table corrupt"
1101                     ": instruction code 0x%02X given", c);
1102             break;
1103         }
1104     }
1105
1106     ins->rex &= rex_mask;
1107
1108     if (ins->rex & REX_V) {
1109         int bad32 = REX_R|REX_W|REX_X|REX_B;
1110
1111         if (ins->rex & REX_H) {
1112             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1113             return -1;
1114         }
1115         switch (ins->vex_wlp & 030) {
1116         case 000:
1117         case 020:
1118             ins->rex &= ~REX_W;
1119             break;
1120         case 010:
1121             ins->rex |= REX_W;
1122             bad32 &= ~REX_W;
1123             break;
1124         case 030:
1125             /* Follow REX_W */
1126             break;
1127         }
1128
1129         if (bits != 64 && ((ins->rex & bad32) || ins->drexdst > 7)) {
1130             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1131             return -1;
1132         }
1133         if (ins->vex_m != 1 || (ins->rex & (REX_W|REX_R|REX_B)))
1134             length += 3;
1135         else
1136             length += 2;
1137     } else if (ins->rex & REX_D) {
1138         if (ins->rex & REX_H) {
1139             errfunc(ERR_NONFATAL, "cannot use high register in drex instruction");
1140             return -1;
1141         }
1142         if (bits != 64 && ((ins->rex & (REX_R|REX_W|REX_X|REX_B)) ||
1143                            ins->drexdst > 7)) {
1144             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1145             return -1;
1146         }
1147         length++;
1148     } else if (ins->rex & REX_REAL) {
1149         if (ins->rex & REX_H) {
1150             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1151             return -1;
1152         } else if (bits == 64) {
1153             length++;
1154         } else if ((ins->rex & REX_L) &&
1155                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1156                    cpu >= IF_X86_64) {
1157             /* LOCK-as-REX.R */
1158             assert_no_prefix(ins, PPS_LREP);
1159             length++;
1160         } else {
1161             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1162             return -1;
1163         }
1164     }
1165
1166     return length;
1167 }
1168
1169 #define EMIT_REX()                                                      \
1170     if (!(ins->rex & (REX_D|REX_V)) && (ins->rex & REX_REAL) && (bits == 64)) { \
1171         ins->rex = (ins->rex & REX_REAL)|REX_P;                         \
1172         out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG); \
1173         ins->rex = 0;                                                   \
1174         offset += 1; \
1175     }
1176
1177 static void gencode(int32_t segment, int64_t offset, int bits,
1178                     insn * ins, const struct itemplate *temp,
1179                     int64_t insn_end)
1180 {
1181     static char condval[] = {   /* conditional opcodes */
1182         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1183         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1184         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1185     };
1186     uint8_t c;
1187     uint8_t bytes[4];
1188     int64_t size;
1189     int64_t data;
1190     int op1, op2;
1191     struct operand *opx;
1192     const uint8_t *codes = temp->code;
1193     uint8_t opex = 0;
1194
1195     while (*codes) {
1196         c = *codes++;
1197         op1 = (c & 3) + ((opex & 1) << 2);
1198         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
1199         opx = &ins->oprs[op1];
1200         opex = 0;               /* For the next iteration */
1201
1202         switch (c) {
1203         case 01:
1204         case 02:
1205         case 03:
1206         case 04:
1207             EMIT_REX();
1208             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1209             codes += c;
1210             offset += c;
1211             break;
1212
1213         case 05:
1214         case 06:
1215         case 07:
1216             opex = c;
1217             break;
1218
1219         case4(010):
1220             EMIT_REX();
1221             bytes[0] = *codes++ + (regval(opx) & 7);
1222             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1223             offset += 1;
1224             break;
1225
1226         case4(014):
1227             /* The test for BITS8 and SBYTE here is intended to avoid
1228                warning on optimizer actions due to SBYTE, while still
1229                warn on explicit BYTE directives.  Also warn, obviously,
1230                if the optimizer isn't enabled. */
1231             if (((opx->type & BITS8) ||
1232                  !(opx->type & temp->opd[op1] & BYTENESS)) &&
1233                 (opx->offset < -128 || opx->offset > 127)) {
1234                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1235                         "signed byte value exceeds bounds");
1236             }
1237             if (opx->segment != NO_SEG) {
1238                 data = opx->offset;
1239                 out(offset, segment, &data, OUT_ADDRESS, 1,
1240                     opx->segment, opx->wrt);
1241             } else {
1242                 bytes[0] = opx->offset;
1243                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1244                     NO_SEG);
1245             }
1246             offset += 1;
1247             break;
1248
1249         case4(020):
1250             if (opx->offset < -256 || opx->offset > 255) {
1251                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1252                         "byte value exceeds bounds");
1253             }
1254             if (opx->segment != NO_SEG) {
1255                 data = opx->offset;
1256                 out(offset, segment, &data, OUT_ADDRESS, 1,
1257                     opx->segment, opx->wrt);
1258             } else {
1259                 bytes[0] = opx->offset;
1260                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1261                     NO_SEG);
1262             }
1263             offset += 1;
1264             break;
1265
1266         case4(024):
1267             if (opx->offset < 0 || opx->offset > 255)
1268                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1269                         "unsigned byte value exceeds bounds");
1270             if (opx->segment != NO_SEG) {
1271                 data = opx->offset;
1272                 out(offset, segment, &data, OUT_ADDRESS, 1,
1273                     opx->segment, opx->wrt);
1274             } else {
1275                 bytes[0] = opx->offset;
1276                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1277                     NO_SEG);
1278             }
1279             offset += 1;
1280             break;
1281
1282         case4(030):
1283             warn_overflow(2, opx);
1284             data = opx->offset;
1285             out(offset, segment, &data, OUT_ADDRESS, 2,
1286                 opx->segment, opx->wrt);
1287             offset += 2;
1288             break;
1289
1290         case4(034):
1291             if (opx->type & (BITS16 | BITS32))
1292                 size = (opx->type & BITS16) ? 2 : 4;
1293             else
1294                 size = (bits == 16) ? 2 : 4;
1295             warn_overflow(size, opx);
1296             data = opx->offset;
1297             out(offset, segment, &data, OUT_ADDRESS, size,
1298                 opx->segment, opx->wrt);
1299             offset += size;
1300             break;
1301
1302         case4(040):
1303             warn_overflow(4, opx);
1304             data = opx->offset;
1305             out(offset, segment, &data, OUT_ADDRESS, 4,
1306                 opx->segment, opx->wrt);
1307             offset += 4;
1308             break;
1309
1310         case4(044):
1311             data = opx->offset;
1312             size = ins->addr_size >> 3;
1313             warn_overflow(size, opx);
1314             out(offset, segment, &data, OUT_ADDRESS, size,
1315                 opx->segment, opx->wrt);
1316             offset += size;
1317             break;
1318
1319         case4(050):
1320             if (opx->segment != segment)
1321                 errfunc(ERR_NONFATAL,
1322                         "short relative jump outside segment");
1323             data = opx->offset - insn_end;
1324             if (data > 127 || data < -128)
1325                 errfunc(ERR_NONFATAL, "short jump is out of range");
1326             bytes[0] = data;
1327             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1328             offset += 1;
1329             break;
1330
1331         case4(054):
1332             data = (int64_t)opx->offset;
1333             out(offset, segment, &data, OUT_ADDRESS, 8,
1334                 opx->segment, opx->wrt);
1335             offset += 8;
1336             break;
1337
1338         case4(060):
1339             if (opx->segment != segment) {
1340                 data = opx->offset;
1341                 out(offset, segment, &data,
1342                     OUT_REL2ADR, insn_end - offset,
1343                     opx->segment, opx->wrt);
1344             } else {
1345                 data = opx->offset - insn_end;
1346                 out(offset, segment, &data,
1347                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1348             }
1349             offset += 2;
1350             break;
1351
1352         case4(064):
1353             if (opx->type & (BITS16 | BITS32 | BITS64))
1354                 size = (opx->type & BITS16) ? 2 : 4;
1355             else
1356                 size = (bits == 16) ? 2 : 4;
1357             if (opx->segment != segment) {
1358                 data = opx->offset;
1359                 out(offset, segment, &data,
1360                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1361                     insn_end - offset, opx->segment, opx->wrt);
1362             } else {
1363                 data = opx->offset - insn_end;
1364                 out(offset, segment, &data,
1365                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1366             }
1367             offset += size;
1368             break;
1369
1370         case4(070):
1371             if (opx->segment != segment) {
1372                 data = opx->offset;
1373                 out(offset, segment, &data,
1374                     OUT_REL4ADR, insn_end - offset,
1375                     opx->segment, opx->wrt);
1376             } else {
1377                 data = opx->offset - insn_end;
1378                 out(offset, segment, &data,
1379                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1380             }
1381             offset += 4;
1382             break;
1383
1384         case4(074):
1385             if (opx->segment == NO_SEG)
1386                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1387                         " relocatable");
1388             data = 0;
1389             out(offset, segment, &data, OUT_ADDRESS, 2,
1390                 outfmt->segbase(1 + opx->segment),
1391                 opx->wrt);
1392             offset += 2;
1393             break;
1394
1395         case4(0140):
1396             data = opx->offset;
1397             warn_overflow(2, opx);
1398             if (is_sbyte16(opx)) {
1399                 bytes[0] = data;
1400                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1401                     NO_SEG);
1402                 offset++;
1403             } else {
1404                 out(offset, segment, &data, OUT_ADDRESS, 2,
1405                     opx->segment, opx->wrt);
1406                 offset += 2;
1407             }
1408             break;
1409
1410         case4(0144):
1411             EMIT_REX();
1412             bytes[0] = *codes++;
1413             if (is_sbyte16(opx))
1414                 bytes[0] |= 2;  /* s-bit */
1415             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1416             offset++;
1417             break;
1418
1419         case4(0150):
1420             data = opx->offset;
1421             warn_overflow(4, opx);
1422             if (is_sbyte32(opx)) {
1423                 bytes[0] = data;
1424                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1425                     NO_SEG);
1426                 offset++;
1427             } else {
1428                 out(offset, segment, &data, OUT_ADDRESS, 4,
1429                     opx->segment, opx->wrt);
1430                 offset += 4;
1431             }
1432             break;
1433
1434         case4(0154):
1435             EMIT_REX();
1436             bytes[0] = *codes++;
1437             if (is_sbyte32(opx))
1438                 bytes[0] |= 2;  /* s-bit */
1439             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1440             offset++;
1441             break;
1442
1443         case4(0160):
1444         case4(0164):
1445             break;
1446
1447         case 0171:
1448             bytes[0] =
1449                 (ins->drexdst << 4) |
1450                 (ins->rex & REX_OC ? 0x08 : 0) |
1451                 (ins->rex & (REX_R|REX_X|REX_B));
1452             ins->rex = 0;
1453             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1454             offset++;
1455             break;
1456
1457         case 0172:
1458             c = *codes++;
1459             opx = &ins->oprs[c >> 3];
1460             bytes[0] = nasm_regvals[opx->basereg] << 4;
1461             opx = &ins->oprs[c & 7];
1462             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1463                 errfunc(ERR_NONFATAL,
1464                         "non-absolute expression not permitted as argument %d",
1465                         c & 7);
1466             } else {
1467                 if (opx->offset & ~15) {
1468                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1469                             "four-bit argument exceeds bounds");
1470                 }
1471                 bytes[0] |= opx->offset & 15;
1472             }
1473             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1474             offset++;
1475             break;
1476
1477         case 0173:
1478             c = *codes++;
1479             opx = &ins->oprs[c >> 4];
1480             bytes[0] = nasm_regvals[opx->basereg] << 4;
1481             bytes[0] |= c & 15;
1482             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1483             offset++;
1484             break;
1485
1486         case 0174:
1487             c = *codes++;
1488             opx = &ins->oprs[c];
1489             bytes[0] = nasm_regvals[opx->basereg] << 4;
1490             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1491             offset++;
1492             break;
1493
1494         case4(0250):
1495             data = opx->offset;
1496             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1497                 (int32_t)data != (int64_t)data) {
1498                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1499                         "signed dword immediate exceeds bounds");
1500             }
1501             if (is_sbyte32(opx)) {
1502                 bytes[0] = data;
1503                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1504                     NO_SEG);
1505                 offset++;
1506             } else {
1507                 out(offset, segment, &data, OUT_ADDRESS, 4,
1508                     opx->segment, opx->wrt);
1509                 offset += 4;
1510             }
1511             break;
1512
1513         case4(0254):
1514             data = opx->offset;
1515             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1516                 (int32_t)data != (int64_t)data) {
1517                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1518                         "signed dword immediate exceeds bounds");
1519             }
1520             out(offset, segment, &data, OUT_ADDRESS, 4,
1521                 opx->segment, opx->wrt);
1522             offset += 4;
1523             break;
1524
1525         case4(0260):
1526         case 0270:
1527             codes += 2;
1528             if (ins->vex_m != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1529                 bytes[0] = 0xc4;
1530                 bytes[1] = ins->vex_m | ((~ins->rex & 7) << 5);
1531                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1532                     ((~ins->drexdst & 15)<< 3) | (ins->vex_wlp & 07);
1533                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1534                 offset += 3;
1535             } else {
1536                 bytes[0] = 0xc5;
1537                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1538                     ((~ins->drexdst & 15) << 3) | (ins->vex_wlp & 07);
1539                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1540                 offset += 2;
1541             }
1542             break;
1543
1544         case4(0274):
1545         {
1546             uint64_t uv, um;
1547             int s;
1548
1549             if (ins->rex & REX_W)
1550                 s = 64;
1551             else if (ins->prefixes[PPS_OSIZE] == P_O16)
1552                 s = 16;
1553             else if (ins->prefixes[PPS_OSIZE] == P_O32)
1554                 s = 32;
1555             else
1556                 s = bits;
1557
1558             um = (uint64_t)2 << (s-1);
1559             uv = opx->offset;
1560
1561             if (uv > 127 && uv < (uint64_t)-128 &&
1562                 (uv < um-128 || uv > um-1)) {
1563                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1564                         "signed byte value exceeds bounds");
1565             }
1566             if (opx->segment != NO_SEG) {
1567                 data = uv;
1568                 out(offset, segment, &data, OUT_ADDRESS, 1,
1569                     opx->segment, opx->wrt);
1570             } else {
1571                 bytes[0] = uv;
1572                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1573                     NO_SEG);
1574             }
1575             offset += 1;
1576             break;
1577         }
1578
1579         case4(0300):
1580             break;
1581
1582         case 0310:
1583             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1584                 *bytes = 0x67;
1585                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1586                 offset += 1;
1587             } else
1588                 offset += 0;
1589             break;
1590
1591         case 0311:
1592             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1593                 *bytes = 0x67;
1594                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1595                 offset += 1;
1596             } else
1597                 offset += 0;
1598             break;
1599
1600         case 0312:
1601             break;
1602
1603         case 0313:
1604             ins->rex = 0;
1605             break;
1606
1607         case4(0314):
1608             break;
1609
1610         case 0320:
1611             if (bits != 16) {
1612                 *bytes = 0x66;
1613                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1614                 offset += 1;
1615             } else
1616                 offset += 0;
1617             break;
1618
1619         case 0321:
1620             if (bits == 16) {
1621                 *bytes = 0x66;
1622                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1623                 offset += 1;
1624             } else
1625                 offset += 0;
1626             break;
1627
1628         case 0322:
1629         case 0323:
1630             break;
1631
1632         case 0324:
1633             ins->rex |= REX_W;
1634             break;
1635
1636         case 0330:
1637             *bytes = *codes++ ^ condval[ins->condition];
1638             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1639             offset += 1;
1640             break;
1641
1642         case 0331:
1643             break;
1644
1645         case 0332:
1646         case 0333:
1647             *bytes = c - 0332 + 0xF2;
1648             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1649             offset += 1;
1650             break;
1651
1652         case 0334:
1653             if (ins->rex & REX_R) {
1654                 *bytes = 0xF0;
1655                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1656                 offset += 1;
1657             }
1658             ins->rex &= ~(REX_L|REX_R);
1659             break;
1660
1661         case 0335:
1662             break;
1663
1664         case 0336:
1665         case 0337:
1666             break;
1667
1668         case 0340:
1669             if (ins->oprs[0].segment != NO_SEG)
1670                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1671             else {
1672                 int64_t size = ins->oprs[0].offset;
1673                 if (size > 0)
1674                     out(offset, segment, NULL,
1675                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1676                 offset += size;
1677             }
1678             break;
1679
1680         case 0344:
1681         case 0345:
1682             bytes[0] = c & 1;
1683             switch (ins->oprs[0].basereg) {
1684             case R_CS:
1685                 bytes[0] += 0x0E;
1686                 break;
1687             case R_DS:
1688                 bytes[0] += 0x1E;
1689                 break;
1690             case R_ES:
1691                 bytes[0] += 0x06;
1692                 break;
1693             case R_SS:
1694                 bytes[0] += 0x16;
1695                 break;
1696             default:
1697                 errfunc(ERR_PANIC,
1698                         "bizarre 8086 segment register received");
1699             }
1700             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1701             offset++;
1702             break;
1703
1704         case 0346:
1705         case 0347:
1706             bytes[0] = c & 1;
1707             switch (ins->oprs[0].basereg) {
1708             case R_FS:
1709                 bytes[0] += 0xA0;
1710                 break;
1711             case R_GS:
1712                 bytes[0] += 0xA8;
1713                 break;
1714             default:
1715                 errfunc(ERR_PANIC,
1716                         "bizarre 386 segment register received");
1717             }
1718             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1719             offset++;
1720             break;
1721
1722         case 0360:
1723             break;
1724
1725         case 0361:
1726             bytes[0] = 0x66;
1727             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1728             offset += 1;
1729             break;
1730
1731         case 0362:
1732         case 0363:
1733             bytes[0] = c - 0362 + 0xf2;
1734             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1735             offset += 1;
1736             break;
1737
1738         case 0364:
1739         case 0365:
1740             break;
1741
1742         case 0366:
1743         case 0367:
1744             *bytes = c - 0366 + 0x66;
1745             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1746             offset += 1;
1747             break;
1748
1749         case 0370:
1750         case 0371:
1751         case 0372:
1752             break;
1753
1754         case 0373:
1755             *bytes = bits == 16 ? 3 : 5;
1756             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1757             offset += 1;
1758             break;
1759
1760         case4(0100):
1761         case4(0110):
1762         case4(0120):
1763         case4(0130):
1764         case4(0200):
1765         case4(0204):
1766         case4(0210):
1767         case4(0214):
1768         case4(0220):
1769         case4(0224):
1770         case4(0230):
1771         case4(0234):
1772             {
1773                 ea ea_data;
1774                 int rfield;
1775                 int32_t rflags;
1776                 uint8_t *p;
1777                 int32_t s;
1778                 enum out_type type;
1779                 struct operand *opy = &ins->oprs[op2];
1780
1781                 if (c <= 0177) {
1782                     /* pick rfield from operand b (opx) */
1783                     rflags = regflag(opx);
1784                     rfield = nasm_regvals[opx->basereg];
1785                 } else {
1786                     /* rfield is constant */
1787                     rflags = 0;
1788                     rfield = c & 7;
1789                 }
1790
1791                 if (!process_ea(opy, &ea_data, bits, ins->addr_size,
1792                                 rfield, rflags)) {
1793                     errfunc(ERR_NONFATAL, "invalid effective address");
1794                 }
1795
1796
1797                 p = bytes;
1798                 *p++ = ea_data.modrm;
1799                 if (ea_data.sib_present)
1800                     *p++ = ea_data.sib;
1801
1802                 /* DREX suffixes come between the SIB and the displacement */
1803                 if (ins->rex & REX_D) {
1804                     *p++ = (ins->drexdst << 4) |
1805                            (ins->rex & REX_OC ? 0x08 : 0) |
1806                            (ins->rex & (REX_R|REX_X|REX_B));
1807                     ins->rex = 0;
1808                 }
1809
1810                 s = p - bytes;
1811                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1812
1813                 /*
1814                  * Make sure the address gets the right offset in case
1815                  * the line breaks in the .lst file (BR 1197827)
1816                  */
1817                 offset += s;
1818                 s = 0;
1819
1820                 switch (ea_data.bytes) {
1821                 case 0:
1822                     break;
1823                 case 1:
1824                 case 2:
1825                 case 4:
1826                 case 8:
1827                     data = opy->offset;
1828                     warn_overflow(ea_data.bytes, opy);
1829                     s += ea_data.bytes;
1830                     if (ea_data.rip) {
1831                         if (opy->segment == segment) {
1832                             data -= insn_end;
1833                             out(offset, segment, &data, OUT_ADDRESS,
1834                                 ea_data.bytes, NO_SEG, NO_SEG);
1835                         } else {
1836                             out(offset, segment, &data, OUT_REL4ADR,
1837                                 insn_end - offset, opy->segment, opy->wrt);
1838                         }
1839                     } else {
1840                         type = OUT_ADDRESS;
1841                         out(offset, segment, &data, OUT_ADDRESS,
1842                             ea_data.bytes, opy->segment, opy->wrt);
1843                     }
1844                     break;
1845                 default:
1846                     /* Impossible! */
1847                     errfunc(ERR_PANIC,
1848                             "Invalid amount of bytes (%d) for offset?!",
1849                             ea_data.bytes);
1850                     break;
1851                 }
1852                 offset += s;
1853             }
1854             break;
1855
1856         default:
1857             errfunc(ERR_PANIC, "internal instruction table corrupt"
1858                     ": instruction code 0x%02X given", c);
1859             break;
1860         }
1861     }
1862 }
1863
1864 static int32_t regflag(const operand * o)
1865 {
1866     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1867         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1868     }
1869     return nasm_reg_flags[o->basereg];
1870 }
1871
1872 static int32_t regval(const operand * o)
1873 {
1874     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1875         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1876     }
1877     return nasm_regvals[o->basereg];
1878 }
1879
1880 static int op_rexflags(const operand * o, int mask)
1881 {
1882     int32_t flags;
1883     int val;
1884
1885     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1886         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1887     }
1888
1889     flags = nasm_reg_flags[o->basereg];
1890     val = nasm_regvals[o->basereg];
1891
1892     return rexflags(val, flags, mask);
1893 }
1894
1895 static int rexflags(int val, int32_t flags, int mask)
1896 {
1897     int rex = 0;
1898
1899     if (val >= 8)
1900         rex |= REX_B|REX_X|REX_R;
1901     if (flags & BITS64)
1902         rex |= REX_W;
1903     if (!(REG_HIGH & ~flags))   /* AH, CH, DH, BH */
1904         rex |= REX_H;
1905     else if (!(REG8 & ~flags) && val >= 4) /* SPL, BPL, SIL, DIL */
1906         rex |= REX_P;
1907
1908     return rex & mask;
1909 }
1910
1911 static int matches(const struct itemplate *itemp, insn * instruction, int bits)
1912 {
1913     int i, size[MAX_OPERANDS], asize, oprs, ret;
1914
1915     ret = 100;
1916
1917     /*
1918      * Check the opcode
1919      */
1920     if (itemp->opcode != instruction->opcode)
1921         return 0;
1922
1923     /*
1924      * Count the operands
1925      */
1926     if (itemp->operands != instruction->operands)
1927         return 0;
1928
1929     /*
1930      * Check that no spurious colons or TOs are present
1931      */
1932     for (i = 0; i < itemp->operands; i++)
1933         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
1934             return 0;
1935
1936     /*
1937      * Process size flags
1938      */
1939     if (itemp->flags & IF_ARMASK) {
1940         memset(size, 0, sizeof size);
1941
1942         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
1943
1944         switch (itemp->flags & IF_SMASK) {
1945         case IF_SB:
1946             size[i] = BITS8;
1947             break;
1948         case IF_SW:
1949             size[i] = BITS16;
1950             break;
1951         case IF_SD:
1952             size[i] = BITS32;
1953             break;
1954         case IF_SQ:
1955             size[i] = BITS64;
1956             break;
1957         case IF_SO:
1958             size[i] = BITS128;
1959             break;
1960         case IF_SY:
1961             size[i] = BITS256;
1962             break;
1963         case IF_SZ:
1964             switch (bits) {
1965             case 16:
1966                 size[i] = BITS16;
1967                 break;
1968             case 32:
1969                 size[i] = BITS32;
1970                 break;
1971             case 64:
1972                 size[i] = BITS64;
1973                 break;
1974             }
1975             break;
1976         default:
1977             break;
1978         }
1979     } else {
1980         asize = 0;
1981         switch (itemp->flags & IF_SMASK) {
1982         case IF_SB:
1983             asize = BITS8;
1984             break;
1985         case IF_SW:
1986             asize = BITS16;
1987             break;
1988         case IF_SD:
1989             asize = BITS32;
1990             break;
1991         case IF_SQ:
1992             asize = BITS64;
1993             break;
1994         case IF_SO:
1995             asize = BITS128;
1996             break;
1997         case IF_SY:
1998             asize = BITS256;
1999             break;
2000         case IF_SZ:
2001             switch (bits) {
2002             case 16:
2003                 asize = BITS16;
2004                 break;
2005             case 32:
2006                 asize = BITS32;
2007                 break;
2008             case 64:
2009                 asize = BITS64;
2010                 break;
2011             }
2012             break;
2013         default:
2014             break;
2015         }
2016         for (i = 0; i < MAX_OPERANDS; i++)
2017             size[i] = asize;
2018     }
2019
2020     /*
2021      * Check that the operand flags all match up
2022      */
2023     for (i = 0; i < itemp->operands; i++) {
2024         int32_t type = instruction->oprs[i].type;
2025         if (!(type & SIZE_MASK))
2026             type |= size[i];
2027
2028         if (itemp->opd[i] & SAME_AS) {
2029             int j = itemp->opd[i] & ~SAME_AS;
2030             if (type != instruction->oprs[j].type ||
2031                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2032                 return 0;
2033         } else if (itemp->opd[i] & ~type ||
2034             ((itemp->opd[i] & SIZE_MASK) &&
2035              ((itemp->opd[i] ^ type) & SIZE_MASK))) {
2036             if ((itemp->opd[i] & ~type & ~SIZE_MASK) ||
2037                 (type & SIZE_MASK))
2038                 return 0;
2039             else
2040                 return 1;
2041         }
2042     }
2043
2044     /*
2045      * Check operand sizes
2046      */
2047     if (itemp->flags & (IF_SM | IF_SM2)) {
2048         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2049         asize = 0;
2050         for (i = 0; i < oprs; i++) {
2051             if ((asize = itemp->opd[i] & SIZE_MASK) != 0) {
2052                 int j;
2053                 for (j = 0; j < oprs; j++)
2054                     size[j] = asize;
2055                 break;
2056             }
2057         }
2058     } else {
2059         oprs = itemp->operands;
2060     }
2061
2062     for (i = 0; i < itemp->operands; i++) {
2063         if (!(itemp->opd[i] & SIZE_MASK) &&
2064             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2065             return 2;
2066     }
2067
2068     /*
2069      * Check template is okay at the set cpu level
2070      */
2071     if (((itemp->flags & IF_PLEVEL) > cpu))
2072         return 3;
2073
2074     /*
2075      * Verify the appropriate long mode flag.
2076      */
2077     if ((itemp->flags & (bits == 64 ? IF_NOLONG : IF_LONG)))
2078         return 4;
2079
2080     /*
2081      * Check if special handling needed for Jumps
2082      */
2083     if ((uint8_t)(itemp->code[0]) >= 0370)
2084         return 99;
2085
2086     return ret;
2087 }
2088
2089 static ea *process_ea(operand * input, ea * output, int bits,
2090                       int addrbits, int rfield, int32_t rflags)
2091 {
2092     bool forw_ref = !!(input->opflags & OPFLAG_FORWARD);
2093
2094     output->rip = false;
2095
2096     /* REX flags for the rfield operand */
2097     output->rex |= rexflags(rfield, rflags, REX_R|REX_P|REX_W|REX_H);
2098
2099     if (!(REGISTER & ~input->type)) {   /* register direct */
2100         int i;
2101         int32_t f;
2102
2103         if (input->basereg < EXPR_REG_START /* Verify as Register */
2104             || input->basereg >= REG_ENUM_LIMIT)
2105             return NULL;
2106         f = regflag(input);
2107         i = nasm_regvals[input->basereg];
2108
2109         if (REG_EA & ~f)
2110             return NULL;        /* Invalid EA register */
2111
2112         output->rex |= op_rexflags(input, REX_B|REX_P|REX_W|REX_H);
2113
2114         output->sib_present = false;             /* no SIB necessary */
2115         output->bytes = 0;  /* no offset necessary either */
2116         output->modrm = 0xC0 | ((rfield & 7) << 3) | (i & 7);
2117     } else {                    /* it's a memory reference */
2118         if (input->basereg == -1
2119             && (input->indexreg == -1 || input->scale == 0)) {
2120             /* it's a pure offset */
2121             if (bits == 64 && (~input->type & IP_REL)) {
2122               int scale, index, base;
2123               output->sib_present = true;
2124               scale = 0;
2125               index = 4;
2126               base = 5;
2127               output->sib = (scale << 6) | (index << 3) | base;
2128               output->bytes = 4;
2129               output->modrm = 4 | ((rfield & 7) << 3);
2130               output->rip = false;
2131             } else {
2132               output->sib_present = false;
2133               output->bytes = (addrbits != 16 ? 4 : 2);
2134               output->modrm = (addrbits != 16 ? 5 : 6) | ((rfield & 7) << 3);
2135               output->rip = bits == 64;
2136             }
2137         } else {                /* it's an indirection */
2138             int i = input->indexreg, b = input->basereg, s = input->scale;
2139             int32_t o = input->offset, seg = input->segment;
2140             int hb = input->hintbase, ht = input->hinttype;
2141             int t;
2142             int it, bt;
2143             int32_t ix, bx;     /* register flags */
2144
2145             if (s == 0)
2146                 i = -1;         /* make this easy, at least */
2147
2148             if (i >= EXPR_REG_START && i < REG_ENUM_LIMIT) {
2149                 it = nasm_regvals[i];
2150                 ix = nasm_reg_flags[i];
2151             } else {
2152                 it = -1;
2153                 ix = 0;
2154             }
2155
2156             if (b >= EXPR_REG_START && b < REG_ENUM_LIMIT) {
2157                 bt = nasm_regvals[b];
2158                 bx = nasm_reg_flags[b];
2159             } else {
2160                 bt = -1;
2161                 bx = 0;
2162             }
2163
2164             /* check for a 32/64-bit memory reference... */
2165             if ((ix|bx) & (BITS32|BITS64)) {
2166                 /* it must be a 32/64-bit memory reference. Firstly we have
2167                  * to check that all registers involved are type E/Rxx. */
2168                 int32_t sok = BITS32|BITS64;
2169
2170                 if (it != -1) {
2171                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2172                         sok &= ix;
2173                     else
2174                         return NULL;
2175                 }
2176
2177                 if (bt != -1) {
2178                     if (REG_GPR & ~bx)
2179                         return NULL; /* Invalid register */
2180                     if (~sok & bx & SIZE_MASK)
2181                         return NULL; /* Invalid size */
2182                     sok &= bx;
2183                 }
2184
2185                 /* While we're here, ensure the user didn't specify
2186                    WORD or QWORD. */
2187                 if (input->disp_size == 16 || input->disp_size == 64)
2188                     return NULL;
2189
2190                 if (addrbits == 16 ||
2191                     (addrbits == 32 && !(sok & BITS32)) ||
2192                     (addrbits == 64 && !(sok & BITS64)))
2193                     return NULL;
2194
2195                 /* now reorganize base/index */
2196                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2197                     ((hb == b && ht == EAH_NOTBASE)
2198                      || (hb == i && ht == EAH_MAKEBASE))) {
2199                     /* swap if hints say so */
2200                     t = bt, bt = it, it = t;
2201                     t = bx, bx = ix, ix = t;
2202                 }
2203                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2204                     bt = -1, bx = 0, s++;
2205                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2206                     /* make single reg base, unless hint */
2207                     bt = it, bx = ix, it = -1, ix = 0;
2208                 }
2209                 if (((s == 2 && it != REG_NUM_ESP
2210                       && !(input->eaflags & EAF_TIMESTWO)) || s == 3
2211                      || s == 5 || s == 9) && bt == -1)
2212                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2213                 if (it == -1 && (bt & 7) != REG_NUM_ESP
2214                     && (input->eaflags & EAF_TIMESTWO))
2215                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2216                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2217                 if (s == 1 && it == REG_NUM_ESP) {
2218                     /* swap ESP into base if scale is 1 */
2219                     t = it, it = bt, bt = t;
2220                     t = ix, ix = bx, bx = t;
2221                 }
2222                 if (it == REG_NUM_ESP
2223                     || (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2224                     return NULL;        /* wrong, for various reasons */
2225
2226                 output->rex |= rexflags(it, ix, REX_X);
2227                 output->rex |= rexflags(bt, bx, REX_B);
2228
2229                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2230                     /* no SIB needed */
2231                     int mod, rm;
2232
2233                     if (bt == -1) {
2234                         rm = 5;
2235                         mod = 0;
2236                     } else {
2237                         rm = (bt & 7);
2238                         if (rm != REG_NUM_EBP && o == 0 &&
2239                                 seg == NO_SEG && !forw_ref &&
2240                                 !(input->eaflags &
2241                                   (EAF_BYTEOFFS | EAF_WORDOFFS)))
2242                             mod = 0;
2243                         else if (input->eaflags & EAF_BYTEOFFS ||
2244                                  (o >= -128 && o <= 127 && seg == NO_SEG
2245                                   && !forw_ref
2246                                   && !(input->eaflags & EAF_WORDOFFS)))
2247                             mod = 1;
2248                         else
2249                             mod = 2;
2250                     }
2251
2252                     output->sib_present = false;
2253                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2254                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2255                 } else {
2256                     /* we need a SIB */
2257                     int mod, scale, index, base;
2258
2259                     if (it == -1)
2260                         index = 4, s = 1;
2261                     else
2262                         index = (it & 7);
2263
2264                     switch (s) {
2265                     case 1:
2266                         scale = 0;
2267                         break;
2268                     case 2:
2269                         scale = 1;
2270                         break;
2271                     case 4:
2272                         scale = 2;
2273                         break;
2274                     case 8:
2275                         scale = 3;
2276                         break;
2277                     default:   /* then what the smeg is it? */
2278                         return NULL;    /* panic */
2279                     }
2280
2281                     if (bt == -1) {
2282                         base = 5;
2283                         mod = 0;
2284                     } else {
2285                         base = (bt & 7);
2286                         if (base != REG_NUM_EBP && o == 0 &&
2287                                     seg == NO_SEG && !forw_ref &&
2288                                     !(input->eaflags &
2289                                       (EAF_BYTEOFFS | EAF_WORDOFFS)))
2290                             mod = 0;
2291                         else if (input->eaflags & EAF_BYTEOFFS ||
2292                                  (o >= -128 && o <= 127 && seg == NO_SEG
2293                                   && !forw_ref
2294                                   && !(input->eaflags & EAF_WORDOFFS)))
2295                             mod = 1;
2296                         else
2297                             mod = 2;
2298                     }
2299
2300                     output->sib_present = true;
2301                     output->bytes =  (bt == -1 || mod == 2 ? 4 : mod);
2302                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | 4;
2303                     output->sib = (scale << 6) | (index << 3) | base;
2304                 }
2305             } else {            /* it's 16-bit */
2306                 int mod, rm;
2307
2308                 /* check for 64-bit long mode */
2309                 if (addrbits == 64)
2310                     return NULL;
2311
2312                 /* check all registers are BX, BP, SI or DI */
2313                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI
2314                      && b != R_DI) || (i != -1 && i != R_BP && i != R_BX
2315                                        && i != R_SI && i != R_DI))
2316                     return NULL;
2317
2318                 /* ensure the user didn't specify DWORD/QWORD */
2319                 if (input->disp_size == 32 || input->disp_size == 64)
2320                     return NULL;
2321
2322                 if (s != 1 && i != -1)
2323                     return NULL;        /* no can do, in 16-bit EA */
2324                 if (b == -1 && i != -1) {
2325                     int tmp = b;
2326                     b = i;
2327                     i = tmp;
2328                 }               /* swap */
2329                 if ((b == R_SI || b == R_DI) && i != -1) {
2330                     int tmp = b;
2331                     b = i;
2332                     i = tmp;
2333                 }
2334                 /* have BX/BP as base, SI/DI index */
2335                 if (b == i)
2336                     return NULL;        /* shouldn't ever happen, in theory */
2337                 if (i != -1 && b != -1 &&
2338                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2339                     return NULL;        /* invalid combinations */
2340                 if (b == -1)    /* pure offset: handled above */
2341                     return NULL;        /* so if it gets to here, panic! */
2342
2343                 rm = -1;
2344                 if (i != -1)
2345                     switch (i * 256 + b) {
2346                     case R_SI * 256 + R_BX:
2347                         rm = 0;
2348                         break;
2349                     case R_DI * 256 + R_BX:
2350                         rm = 1;
2351                         break;
2352                     case R_SI * 256 + R_BP:
2353                         rm = 2;
2354                         break;
2355                     case R_DI * 256 + R_BP:
2356                         rm = 3;
2357                         break;
2358                 } else
2359                     switch (b) {
2360                     case R_SI:
2361                         rm = 4;
2362                         break;
2363                     case R_DI:
2364                         rm = 5;
2365                         break;
2366                     case R_BP:
2367                         rm = 6;
2368                         break;
2369                     case R_BX:
2370                         rm = 7;
2371                         break;
2372                     }
2373                 if (rm == -1)   /* can't happen, in theory */
2374                     return NULL;        /* so panic if it does */
2375
2376                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2377                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2378                     mod = 0;
2379                 else if (input->eaflags & EAF_BYTEOFFS ||
2380                          (o >= -128 && o <= 127 && seg == NO_SEG
2381                           && !forw_ref
2382                           && !(input->eaflags & EAF_WORDOFFS)))
2383                     mod = 1;
2384                 else
2385                     mod = 2;
2386
2387                 output->sib_present = false;    /* no SIB - it's 16-bit */
2388                 output->bytes = mod;    /* bytes of offset needed */
2389                 output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2390             }
2391         }
2392     }
2393
2394     output->size = 1 + output->sib_present + output->bytes;
2395     return output;
2396 }
2397
2398 static void add_asp(insn *ins, int addrbits)
2399 {
2400     int j, valid;
2401     int defdisp;
2402
2403     valid = (addrbits == 64) ? 64|32 : 32|16;
2404
2405     switch (ins->prefixes[PPS_ASIZE]) {
2406     case P_A16:
2407         valid &= 16;
2408         break;
2409     case P_A32:
2410         valid &= 32;
2411         break;
2412     case P_A64:
2413         valid &= 64;
2414         break;
2415     case P_ASP:
2416         valid &= (addrbits == 32) ? 16 : 32;
2417         break;
2418     default:
2419         break;
2420     }
2421
2422     for (j = 0; j < ins->operands; j++) {
2423         if (!(MEMORY & ~ins->oprs[j].type)) {
2424             int32_t i, b;
2425
2426             /* Verify as Register */
2427             if (ins->oprs[j].indexreg < EXPR_REG_START
2428                 || ins->oprs[j].indexreg >= REG_ENUM_LIMIT)
2429                 i = 0;
2430             else
2431                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2432
2433             /* Verify as Register */
2434             if (ins->oprs[j].basereg < EXPR_REG_START
2435                 || ins->oprs[j].basereg >= REG_ENUM_LIMIT)
2436                 b = 0;
2437             else
2438                 b = nasm_reg_flags[ins->oprs[j].basereg];
2439
2440             if (ins->oprs[j].scale == 0)
2441                 i = 0;
2442
2443             if (!i && !b) {
2444                 int ds = ins->oprs[j].disp_size;
2445                 if ((addrbits != 64 && ds > 8) ||
2446                     (addrbits == 64 && ds == 16))
2447                     valid &= ds;
2448             } else {
2449                 if (!(REG16 & ~b))
2450                     valid &= 16;
2451                 if (!(REG32 & ~b))
2452                     valid &= 32;
2453                 if (!(REG64 & ~b))
2454                     valid &= 64;
2455
2456                 if (!(REG16 & ~i))
2457                     valid &= 16;
2458                 if (!(REG32 & ~i))
2459                     valid &= 32;
2460                 if (!(REG64 & ~i))
2461                     valid &= 64;
2462             }
2463         }
2464     }
2465
2466     if (valid & addrbits) {
2467         ins->addr_size = addrbits;
2468     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2469         /* Add an address size prefix */
2470         enum prefixes pref = (addrbits == 32) ? P_A16 : P_A32;
2471         ins->prefixes[PPS_ASIZE] = pref;
2472         ins->addr_size = (addrbits == 32) ? 16 : 32;
2473     } else {
2474         /* Impossible... */
2475         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2476         ins->addr_size = addrbits; /* Error recovery */
2477     }
2478
2479     defdisp = ins->addr_size == 16 ? 16 : 32;
2480
2481     for (j = 0; j < ins->operands; j++) {
2482         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2483             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp)
2484             != ins->addr_size) {
2485             /* mem_offs sizes must match the address size; if not,
2486                strip the MEM_OFFS bit and match only EA instructions */
2487             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2488         }
2489     }
2490 }