assemble.c

   1 /* assemble.c   code generation for the Netwide Assembler
   2  *
   3  * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
   4  * Julian Hall. All rights reserved. The software is
   5  * redistributable under the license given in the file "LICENSE"
   6  * distributed in the NASM archive.
   7  *
   8  * the actual codes (C syntax, i.e. octal):
   9  * \0            - terminates the code. (Unless it's a literal of course.)
  10  * \1..\4        - that many literal bytes follow in the code stream
  11  * \5            - add 4 to the primary operand number (b, low octdigit)
  12  * \6            - add 4 to the secondary operand number (a, middle octdigit)
  13  * \7            - add 4 to both the primary and the secondary operand number
  14  * \10..\13      - a literal byte follows in the code stream, to be added
  15  *                 to the register value of operand 0..3
  16  * \14..\17      - a signed byte immediate operand, from operand 0..3
  17  * \20..\23      - a byte immediate operand, from operand 0..3
  18  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  19  * \30..\33      - a word immediate operand, from operand 0..3
  20  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  21  *                 assembly mode or the operand-size override on the operand
  22  * \40..\43      - a long immediate operand, from operand 0..3
  23  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  24  *                 depending on the address size of the instruction.
  25  * \50..\53      - a byte relative operand, from operand 0..3
  26  * \54..\57      - a qword immediate operand, from operand 0..3
  27  * \60..\63      - a word relative operand, from operand 0..3
  28  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  29  *                 assembly mode or the operand-size override on the operand
  30  * \70..\73      - a long relative operand, from operand 0..3
  31  * \74..\77      - a word constant, from the _segment_ part of operand 0..3
  32  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  33  *                 field the register value of operand b.
  34  * \140..\143    - an immediate word or signed byte for operand 0..3
  35  * \144..\147    - or 2 (s-field) into opcode byte if operand 0..3
  36  *                  is a signed byte rather than a word.  Opcode byte follows.
  37  * \150..\153    - an immediate dword or signed byte for operand 0..3
  38  * \154..\157    - or 2 (s-field) into opcode byte if operand 0..3
  39  *                  is a signed byte rather than a dword.  Opcode byte follows.
  40  * \160..\163    - this instruction uses DREX rather than REX, with the
  41  *                 OC0 field set to 0, and the dest field taken from
  42  *                 operand 0..3.
  43  * \164..\167    - this instruction uses DREX rather than REX, with the
  44  *                 OC0 field set to 1, and the dest field taken from
  45  *                 operand 0..3.
  46  * \171          - placement of DREX suffix in the absence of an EA
  47  * \172\ab       - the register number from operand a in bits 7..4, with
  48  *                 the 4-bit immediate from operand b in bits 3..0.
  49  * \173\xab      - the register number from operand a in bits 7..4, with
  50  *                 the value b in bits 3..0.
  51  * \174\a        - the register number from operand a in bits 7..4, and
  52  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  53  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  54  *                 field equal to digit b.
  55  * \250..\253    - same as \150..\153, except warn if the 64-bit operand
  56  *                 is not equal to the truncated and sign-extended 32-bit
  57  *                 operand; used for 32-bit immediates in 64-bit mode.
  58  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
  59  * \260..\263    - this instruction uses VEX rather than REX, with the
  60  *                 V field taken from operand 0..3.
  61  * \270          - this instruction uses VEX rather than REX, with the
  62  *                 V field set to 1111b.
  63  *
  64  * VEX prefixes are followed by the sequence:
  65  * \mm\wlp         where mm is the M field; and wlp is:
  66  *                 00 0ww lpp
  67  *                 [w0] ww = 0 for W = 0
  68  *                 [w1] ww = 1 for W = 1
  69  *                 [wx] ww = 2 for W don't care (always assembled as 0)
  70  *                 [ww] ww = 3 for W used as REX.W
  71  *
  72  *
  73  * \274..\277    - a signed byte immediate operand, from operand 0..3,
  74  *                 which is to be extended to the operand size.
  75  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
  76  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
  77  * \312          - (disassembler only) marker on LOOP, LOOPxx instructions.
  78  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
  79  * \314          - (disassembler only) invalid with REX.B
  80  * \315          - (disassembler only) invalid with REX.X
  81  * \316          - (disassembler only) invalid with REX.R
  82  * \317          - (disassembler only) invalid with REX.W
  83  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
  84  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
  85  * \322          - indicates that this instruction is only valid when the
  86  *                 operand size is the default (instruction to disassembler,
  87  *                 generates no code in the assembler)
  88  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
  89  * \324          - indicates 64-bit operand size requiring REX prefix.
  90  * \330          - a literal byte follows in the code stream, to be added
  91  *                 to the condition code value of the instruction.
  92  * \331          - instruction not valid with REP prefix.  Hint for
  93  *                 disassembler only; for SSE instructions.
  94  * \332          - REP prefix (0xF2 byte) used as opcode extension.
  95  * \333          - REP prefix (0xF3 byte) used as opcode extension.
  96  * \334          - LOCK prefix used instead of REX.R
  97  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
  98  * \336          - force a REP(E) prefix (0xF2) even if not specified.
  99  * \337          - force a REPNE prefix (0xF3) even if not specified.
 100  *                 \336-\337 are still listed as prefixes in the disassembler.
 101  * \340          - reserve <operand 0> bytes of uninitialized storage.
 102  *                 Operand 0 had better be a segmentless constant.
 103  * \344,\345     - the PUSH/POP (respectively) codes for CS, DS, ES, SS
 104  *                 (POP is never used for CS) depending on operand 0
 105  * \346,\347     - the second byte of PUSH/POP codes for FS, GS, depending
 106  *                 on operand 0
 107  * \360          - no SSE prefix (== \364\331)
 108  * \361          - 66 SSE prefix (== \366\331)
 109  * \362          - F2 SSE prefix (== \364\332)
 110  * \363          - F3 SSE prefix (== \364\333)
 111  * \364          - operand-size prefix (0x66) not permitted
 112  * \365          - address-size prefix (0x67) not permitted
 113  * \366          - operand-size prefix (0x66) used as opcode extension
 114  * \367          - address-size prefix (0x67) used as opcode extension
 115  * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
 116  *                 370 is used for Jcc, 371 is used for JMP.
 117  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 118  *                 used for conditional jump over longer jump
 119  */
 120
 121 #include "compiler.h"
 122
 123 #include <stdio.h>
 124 #include <string.h>
 125 #include <inttypes.h>
 126
 127 #include "nasm.h"
 128 #include "nasmlib.h"
 129 #include "assemble.h"
 130 #include "insns.h"
 131 #include "tables.h"
 132
 133 /* Initialized to zero by the C standard */
 134 static const uint8_t const_zero_buf[256];
 135
 136 typedef struct {
 137     int sib_present;                 /* is a SIB byte necessary? */
 138     int bytes;                       /* # of bytes of offset needed */
 139     int size;                        /* lazy - this is sib+bytes+1 */
 140     uint8_t modrm, sib, rex, rip;    /* the bytes themselves */
 141 } ea;
 142
 143 static uint32_t cpu;            /* cpu level received from nasm.c */
 144 static efunc errfunc;
 145 static struct ofmt *outfmt;
 146 static ListGen *list;
 147
 148 static int64_t calcsize(int32_t, int64_t, int, insn *, const uint8_t *);
 149 static void gencode(int32_t segment, int64_t offset, int bits,
 150                     insn * ins, const struct itemplate *temp,
 151                     int64_t insn_end);
 152 static int matches(const struct itemplate *, insn *, int bits);
 153 static int32_t regflag(const operand *);
 154 static int32_t regval(const operand *);
 155 static int rexflags(int, int32_t, int);
 156 static int op_rexflags(const operand *, int);
 157 static ea *process_ea(operand *, ea *, int, int, int, int32_t);
 158 static void add_asp(insn *, int);
 159
 160 static int has_prefix(insn * ins, enum prefix_pos pos, enum prefixes prefix)
 161 {
 162     return ins->prefixes[pos] == prefix;
 163 }
 164
 165 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 166 {
 167     if (ins->prefixes[pos])
 168         errfunc(ERR_NONFATAL, "invalid %s prefix",
 169                 prefix_name(ins->prefixes[pos]));
 170 }
 171
 172 static const char *size_name(int size)
 173 {
 174     switch (size) {
 175     case 1:
 176         return "byte";
 177     case 2:
 178         return "word";
 179     case 4:
 180         return "dword";
 181     case 8:
 182         return "qword";
 183     case 10:
 184         return "tword";
 185     case 16:
 186         return "oword";
 187     case 32:
 188         return "yword";
 189     default:
 190         return "???";
 191     }
 192 }
 193
 194 static void warn_overflow(int size, const struct operand *o)
 195 {
 196     if (size < 8 && o->wrt == NO_SEG && o->segment == NO_SEG) {
 197         int64_t lim = ((int64_t)1 << (size*8))-1;
 198         int64_t data = o->offset;
 199
 200         if (data < ~lim || data > lim)
 201             errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
 202                     "%s data exceeds bounds", size_name(size));
 203     }
 204 }
 205 /*
 206  * This routine wrappers the real output format's output routine,
 207  * in order to pass a copy of the data off to the listing file
 208  * generator at the same time.
 209  */
 210 static void out(int64_t offset, int32_t segto, const void *data,
 211                 enum out_type type, uint64_t size,
 212                 int32_t segment, int32_t wrt)
 213 {
 214     static int32_t lineno = 0;     /* static!!! */
 215     static char *lnfname = NULL;
 216     uint8_t p[8];
 217
 218     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 219         /*
 220          * This is a non-relocated address, and we're going to
 221          * convert it into RAWDATA format.
 222          */
 223         uint8_t *q = p;
 224
 225         if (size > 8) {
 226             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 227             return;
 228         }
 229
 230         WRITEADDR(q, *(int64_t *)data, size);
 231         data = p;
 232         type = OUT_RAWDATA;
 233     }
 234
 235     list->output(offset, data, type, size);
 236
 237     /*
 238      * this call to src_get determines when we call the
 239      * debug-format-specific "linenum" function
 240      * it updates lineno and lnfname to the current values
 241      * returning 0 if "same as last time", -2 if lnfname
 242      * changed, and the amount by which lineno changed,
 243      * if it did. thus, these variables must be static
 244      */
 245
 246     if (src_get(&lineno, &lnfname)) {
 247         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 248     }
 249
 250     outfmt->output(segto, data, type, size, segment, wrt);
 251 }
 252
 253 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 254                      insn * ins, const uint8_t *code)
 255 {
 256     int64_t isize;
 257     uint8_t c = code[0];
 258
 259     if ((c != 0370 && c != 0371) || (ins->oprs[0].type & STRICT))
 260         return false;
 261     if (!optimizing)
 262         return false;
 263     if (optimizing < 0 && c == 0371)
 264         return false;
 265
 266     isize = calcsize(segment, offset, bits, ins, code);
 267     if (ins->oprs[0].segment != segment)
 268         return false;
 269
 270     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 271     return (isize >= -128 && isize <= 127); /* is it byte size? */
 272 }
 273
 274 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 275               insn * instruction, struct ofmt *output, efunc error,
 276               ListGen * listgen)
 277 {
 278     const struct itemplate *temp;
 279     int j;
 280     int size_prob;
 281     int64_t insn_end;
 282     int32_t itimes;
 283     int64_t start = offset;
 284     int64_t wsize = 0;             /* size for DB etc. */
 285
 286     errfunc = error;            /* to pass to other functions */
 287     cpu = cp;
 288     outfmt = output;            /* likewise */
 289     list = listgen;             /* and again */
 290
 291     switch (instruction->opcode) {
 292     case -1:
 293         return 0;
 294     case I_DB:
 295         wsize = 1;
 296         break;
 297     case I_DW:
 298         wsize = 2;
 299         break;
 300     case I_DD:
 301         wsize = 4;
 302         break;
 303     case I_DQ:
 304         wsize = 8;
 305         break;
 306     case I_DT:
 307         wsize = 10;
 308         break;
 309     case I_DO:
 310         wsize = 16;
 311         break;
 312     case I_DY:
 313         wsize = 32;
 314         break;
 315     default:
 316         break;
 317     }
 318
 319     if (wsize) {
 320         extop *e;
 321         int32_t t = instruction->times;
 322         if (t < 0)
 323             errfunc(ERR_PANIC,
 324                     "instruction->times < 0 (%ld) in assemble()", t);
 325
 326         while (t--) {           /* repeat TIMES times */
 327             for (e = instruction->eops; e; e = e->next) {
 328                 if (e->type == EOT_DB_NUMBER) {
 329                     if (wsize == 1) {
 330                         if (e->segment != NO_SEG)
 331                             errfunc(ERR_NONFATAL,
 332                                     "one-byte relocation attempted");
 333                         else {
 334                             uint8_t out_byte = e->offset;
 335                             out(offset, segment, &out_byte,
 336                                 OUT_RAWDATA, 1, NO_SEG, NO_SEG);
 337                         }
 338                     } else if (wsize > 8) {
 339                         errfunc(ERR_NONFATAL,
 340                                 "integer supplied to a DT, DO or DY"
 341                                 " instruction");
 342                     } else
 343                         out(offset, segment, &e->offset,
 344                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 345                     offset += wsize;
 346                 } else if (e->type == EOT_DB_STRING ||
 347                            e->type == EOT_DB_STRING_FREE) {
 348                     int align;
 349
 350                     out(offset, segment, e->stringval,
 351                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 352                     align = e->stringlen % wsize;
 353
 354                     if (align) {
 355                         align = wsize - align;
 356                         out(offset, segment, const_zero_buf,
 357                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 358                     }
 359                     offset += e->stringlen + align;
 360                 }
 361             }
 362             if (t > 0 && t == instruction->times - 1) {
 363                 /*
 364                  * Dummy call to list->output to give the offset to the
 365                  * listing module.
 366                  */
 367                 list->output(offset, NULL, OUT_RAWDATA, 0);
 368                 list->uplevel(LIST_TIMES);
 369             }
 370         }
 371         if (instruction->times > 1)
 372             list->downlevel(LIST_TIMES);
 373         return offset - start;
 374     }
 375
 376     if (instruction->opcode == I_INCBIN) {
 377         const char *fname = instruction->eops->stringval;
 378         FILE *fp;
 379
 380         fp = fopen(fname, "rb");
 381         if (!fp) {
 382             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 383                   fname);
 384         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 385             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 386                   fname);
 387         } else {
 388             static char buf[4096];
 389             size_t t = instruction->times;
 390             size_t base = 0;
 391             size_t len;
 392
 393             len = ftell(fp);
 394             if (instruction->eops->next) {
 395                 base = instruction->eops->next->offset;
 396                 len -= base;
 397                 if (instruction->eops->next->next &&
 398                     len > (size_t)instruction->eops->next->next->offset)
 399                     len = (size_t)instruction->eops->next->next->offset;
 400             }
 401             /*
 402              * Dummy call to list->output to give the offset to the
 403              * listing module.
 404              */
 405             list->output(offset, NULL, OUT_RAWDATA, 0);
 406             list->uplevel(LIST_INCBIN);
 407             while (t--) {
 408                 size_t l;
 409
 410                 fseek(fp, base, SEEK_SET);
 411                 l = len;
 412                 while (l > 0) {
 413                     int32_t m =
 414                         fread(buf, 1, (l > (int32_t) sizeof(buf) ? (int32_t) sizeof(buf) : l),
 415                               fp);
 416                     if (!m) {
 417                         /*
 418                          * This shouldn't happen unless the file
 419                          * actually changes while we are reading
 420                          * it.
 421                          */
 422                         error(ERR_NONFATAL,
 423                               "`incbin': unexpected EOF while"
 424                               " reading file `%s'", fname);
 425                         t = 0;  /* Try to exit cleanly */
 426                         break;
 427                     }
 428                     out(offset, segment, buf, OUT_RAWDATA, m,
 429                         NO_SEG, NO_SEG);
 430                     l -= m;
 431                 }
 432             }
 433             list->downlevel(LIST_INCBIN);
 434             if (instruction->times > 1) {
 435                 /*
 436                  * Dummy call to list->output to give the offset to the
 437                  * listing module.
 438                  */
 439                 list->output(offset, NULL, OUT_RAWDATA, 0);
 440                 list->uplevel(LIST_TIMES);
 441                 list->downlevel(LIST_TIMES);
 442             }
 443             fclose(fp);
 444             return instruction->times * len;
 445         }
 446         return 0;               /* if we're here, there's an error */
 447     }
 448
 449     /* Check to see if we need an address-size prefix */
 450     add_asp(instruction, bits);
 451
 452     size_prob = false;
 453
 454     for (temp = nasm_instructions[instruction->opcode]; temp->opcode != -1; temp++){
 455         int m = matches(temp, instruction, bits);
 456         if (m == 100 ||
 457             (m == 99 && jmp_match(segment, offset, bits,
 458                                   instruction, temp->code))) {
 459             /* Matches! */
 460             int64_t insn_size = calcsize(segment, offset, bits,
 461                                       instruction, temp->code);
 462             itimes = instruction->times;
 463             if (insn_size < 0)  /* shouldn't be, on pass two */
 464                 error(ERR_PANIC, "errors made it through from pass one");
 465             else
 466                 while (itimes--) {
 467                     for (j = 0; j < MAXPREFIX; j++) {
 468                         uint8_t c = 0;
 469                         switch (instruction->prefixes[j]) {
 470                         case P_LOCK:
 471                             c = 0xF0;
 472                             break;
 473                         case P_REPNE:
 474                         case P_REPNZ:
 475                             c = 0xF2;
 476                             break;
 477                         case P_REPE:
 478                         case P_REPZ:
 479                         case P_REP:
 480                             c = 0xF3;
 481                             break;
 482                         case R_CS:
 483                             if (bits == 64) {
 484                                 error(ERR_WARNING | ERR_PASS2,
 485                                       "cs segment base generated, but will be ignored in 64-bit mode");
 486                             }
 487                             c = 0x2E;
 488                             break;
 489                         case R_DS:
 490                             if (bits == 64) {
 491                                 error(ERR_WARNING | ERR_PASS2,
 492                                       "ds segment base generated, but will be ignored in 64-bit mode");
 493                             }
 494                             c = 0x3E;
 495                             break;
 496                         case R_ES:
 497                            if (bits == 64) {
 498                                 error(ERR_WARNING | ERR_PASS2,
 499                                       "es segment base generated, but will be ignored in 64-bit mode");
 500                            }
 501                             c = 0x26;
 502                             break;
 503                         case R_FS:
 504                             c = 0x64;
 505                             break;
 506                         case R_GS:
 507                             c = 0x65;
 508                             break;
 509                         case R_SS:
 510                             if (bits == 64) {
 511                                 error(ERR_WARNING | ERR_PASS2,
 512                                       "ss segment base generated, but will be ignored in 64-bit mode");
 513                             }
 514                             c = 0x36;
 515                             break;
 516                         case R_SEGR6:
 517                         case R_SEGR7:
 518                             error(ERR_NONFATAL,
 519                                   "segr6 and segr7 cannot be used as prefixes");
 520                             break;
 521                         case P_A16:
 522                             if (bits == 64) {
 523                                 error(ERR_NONFATAL,
 524                                       "16-bit addressing is not supported "
 525                                       "in 64-bit mode");
 526                             } else if (bits != 16)
 527                                 c = 0x67;
 528                             break;
 529                         case P_A32:
 530                             if (bits != 32)
 531                                 c = 0x67;
 532                             break;
 533                         case P_A64:
 534                             if (bits != 64) {
 535                                 error(ERR_NONFATAL,
 536                                       "64-bit addressing is only supported "
 537                                       "in 64-bit mode");
 538                             }
 539                             break;
 540                         case P_ASP:
 541                             c = 0x67;
 542                             break;
 543                         case P_O16:
 544                             if (bits != 16)
 545                                 c = 0x66;
 546                             break;
 547                         case P_O32:
 548                             if (bits == 16)
 549                                 c = 0x66;
 550                             break;
 551                         case P_O64:
 552                             /* REX.W */
 553                             break;
 554                         case P_OSP:
 555                             c = 0x66;
 556                             break;
 557                         case P_none:
 558                             break;
 559                         default:
 560                             error(ERR_PANIC, "invalid instruction prefix");
 561                         }
 562                         if (c != 0) {
 563                             out(offset, segment, &c, OUT_RAWDATA, 1,
 564                                 NO_SEG, NO_SEG);
 565                             offset++;
 566                         }
 567                     }
 568                     insn_end = offset + insn_size;
 569                     gencode(segment, offset, bits, instruction,
 570                             temp, insn_end);
 571                     offset += insn_size;
 572                     if (itimes > 0 && itimes == instruction->times - 1) {
 573                         /*
 574                          * Dummy call to list->output to give the offset to the
 575                          * listing module.
 576                          */
 577                         list->output(offset, NULL, OUT_RAWDATA, 0);
 578                         list->uplevel(LIST_TIMES);
 579                     }
 580                 }
 581             if (instruction->times > 1)
 582                 list->downlevel(LIST_TIMES);
 583             return offset - start;
 584         } else if (m > 0 && m > size_prob) {
 585             size_prob = m;
 586         }
 587     }
 588
 589     if (temp->opcode == -1) {   /* didn't match any instruction */
 590         switch (size_prob) {
 591         case 1:
 592             error(ERR_NONFATAL, "operation size not specified");
 593             break;
 594         case 2:
 595             error(ERR_NONFATAL, "mismatch in operand sizes");
 596             break;
 597         case 3:
 598             error(ERR_NONFATAL, "no instruction for this cpu level");
 599             break;
 600         case 4:
 601             error(ERR_NONFATAL, "instruction not supported in 64-bit mode");
 602             break;
 603         default:
 604             error(ERR_NONFATAL,
 605                   "invalid combination of opcode and operands");
 606             break;
 607         }
 608     }
 609     return 0;
 610 }
 611
 612 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 613                insn * instruction, efunc error)
 614 {
 615     const struct itemplate *temp;
 616
 617     errfunc = error;            /* to pass to other functions */
 618     cpu = cp;
 619
 620     if (instruction->opcode == -1)
 621         return 0;
 622
 623     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 624         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 625         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 626         instruction->opcode == I_DY) {
 627         extop *e;
 628         int32_t isize, osize, wsize = 0;   /* placate gcc */
 629
 630         isize = 0;
 631         switch (instruction->opcode) {
 632         case I_DB:
 633             wsize = 1;
 634             break;
 635         case I_DW:
 636             wsize = 2;
 637             break;
 638         case I_DD:
 639             wsize = 4;
 640             break;
 641         case I_DQ:
 642             wsize = 8;
 643             break;
 644         case I_DT:
 645             wsize = 10;
 646             break;
 647         case I_DO:
 648             wsize = 16;
 649             break;
 650         case I_DY:
 651             wsize = 32;
 652             break;
 653         default:
 654             break;
 655         }
 656
 657         for (e = instruction->eops; e; e = e->next) {
 658             int32_t align;
 659
 660             osize = 0;
 661             if (e->type == EOT_DB_NUMBER)
 662                 osize = 1;
 663             else if (e->type == EOT_DB_STRING ||
 664                      e->type == EOT_DB_STRING_FREE)
 665                 osize = e->stringlen;
 666
 667             align = (-osize) % wsize;
 668             if (align < 0)
 669                 align += wsize;
 670             isize += osize + align;
 671         }
 672         return isize * instruction->times;
 673     }
 674
 675     if (instruction->opcode == I_INCBIN) {
 676         const char *fname = instruction->eops->stringval;
 677         FILE *fp;
 678         size_t len;
 679
 680         fp = fopen(fname, "rb");
 681         if (!fp)
 682             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 683                   fname);
 684         else if (fseek(fp, 0L, SEEK_END) < 0)
 685             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 686                   fname);
 687         else {
 688             len = ftell(fp);
 689             fclose(fp);
 690             if (instruction->eops->next) {
 691                 len -= instruction->eops->next->offset;
 692                 if (instruction->eops->next->next &&
 693                     len > (size_t)instruction->eops->next->next->offset) {
 694                     len = (size_t)instruction->eops->next->next->offset;
 695                 }
 696             }
 697             return instruction->times * len;
 698         }
 699         return 0;               /* if we're here, there's an error */
 700     }
 701
 702     /* Check to see if we need an address-size prefix */
 703     add_asp(instruction, bits);
 704
 705     for (temp = nasm_instructions[instruction->opcode]; temp->opcode != -1; temp++) {
 706         int m = matches(temp, instruction, bits);
 707         if (m == 100 ||
 708             (m == 99 && jmp_match(segment, offset, bits,
 709                                   instruction, temp->code))) {
 710             /* we've matched an instruction. */
 711             int64_t isize;
 712             const uint8_t *codes = temp->code;
 713             int j;
 714
 715             isize = calcsize(segment, offset, bits, instruction, codes);
 716             if (isize < 0)
 717                 return -1;
 718             for (j = 0; j < MAXPREFIX; j++) {
 719                 switch (instruction->prefixes[j]) {
 720                 case P_A16:
 721                     if (bits != 16)
 722                         isize++;
 723                     break;
 724                 case P_A32:
 725                     if (bits != 32)
 726                         isize++;
 727                     break;
 728                 case P_O16:
 729                     if (bits != 16)
 730                         isize++;
 731                     break;
 732                 case P_O32:
 733                     if (bits == 16)
 734                         isize++;
 735                     break;
 736                 case P_A64:
 737                 case P_O64:
 738                 case P_none:
 739                     break;
 740                 default:
 741                     isize++;
 742                     break;
 743                 }
 744             }
 745             return isize * instruction->times;
 746         }
 747     }
 748     return -1;                  /* didn't match any instruction */
 749 }
 750
 751 static bool possible_sbyte(operand *o)
 752 {
 753     return o->wrt == NO_SEG && o->segment == NO_SEG &&
 754         !(o->opflags & OPFLAG_FORWARD) &&
 755         optimizing >= 0 && !(o->type & STRICT);
 756 }
 757
 758 /* check that opn[op]  is a signed byte of size 16 or 32 */
 759 static bool is_sbyte16(operand *o)
 760 {
 761     int16_t v;
 762
 763     if (!possible_sbyte(o))
 764         return false;
 765
 766     v = o->offset;
 767     return v >= -128 && v <= 127;
 768 }
 769
 770 static bool is_sbyte32(operand *o)
 771 {
 772     int32_t v;
 773
 774     if (!possible_sbyte(o))
 775         return false;
 776
 777     v = o->offset;
 778     return v >= -128 && v <= 127;
 779 }
 780
 781 /* Common construct */
 782 #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
 783
 784 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 785                         insn * ins, const uint8_t *codes)
 786 {
 787     int64_t length = 0;
 788     uint8_t c;
 789     int rex_mask = ~0;
 790     int op1, op2;
 791     struct operand *opx;
 792     uint8_t opex = 0;
 793
 794     ins->rex = 0;               /* Ensure REX is reset */
 795
 796     if (ins->prefixes[PPS_OSIZE] == P_O64)
 797         ins->rex |= REX_W;
 798
 799     (void)segment;              /* Don't warn that this parameter is unused */
 800     (void)offset;               /* Don't warn that this parameter is unused */
 801
 802     while (*codes) {
 803         c = *codes++;
 804         op1 = (c & 3) + ((opex & 1) << 2);
 805         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
 806         opx = &ins->oprs[op1];
 807         opex = 0;               /* For the next iteration */
 808
 809         switch (c) {
 810         case 01:
 811         case 02:
 812         case 03:
 813         case 04:
 814             codes += c, length += c;
 815             break;
 816
 817         case 05:
 818         case 06:
 819         case 07:
 820             opex = c;
 821             break;
 822
 823         case4(010):
 824             ins->rex |=
 825                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 826             codes++, length++;
 827             break;
 828
 829         case4(014):
 830         case4(020):
 831         case4(024):
 832             length++;
 833             break;
 834
 835         case4(030):
 836             length += 2;
 837             break;
 838
 839         case4(034):
 840             if (opx->type & (BITS16 | BITS32 | BITS64))
 841                 length += (opx->type & BITS16) ? 2 : 4;
 842             else
 843                 length += (bits == 16) ? 2 : 4;
 844             break;
 845
 846         case4(040):
 847             length += 4;
 848             break;
 849
 850         case4(044):
 851             length += ins->addr_size >> 3;
 852             break;
 853
 854         case4(050):
 855             length++;
 856             break;
 857
 858         case4(054):
 859             length += 8; /* MOV reg64/imm */
 860             break;
 861
 862         case4(060):
 863             length += 2;
 864             break;
 865
 866         case4(064):
 867             if (opx->type & (BITS16 | BITS32 | BITS64))
 868                 length += (opx->type & BITS16) ? 2 : 4;
 869             else
 870                 length += (bits == 16) ? 2 : 4;
 871             break;
 872
 873         case4(070):
 874             length += 4;
 875             break;
 876
 877         case4(074):
 878             length += 2;
 879             break;
 880
 881         case4(0140):
 882             length += is_sbyte16(opx) ? 1 : 2;
 883             break;
 884
 885         case4(0144):
 886             codes++;
 887             length++;
 888             break;
 889
 890         case4(0150):
 891             length += is_sbyte32(opx) ? 1 : 4;
 892             break;
 893
 894         case4(0154):
 895             codes++;
 896             length++;
 897             break;
 898
 899         case4(0160):
 900             length++;
 901             ins->rex |= REX_D;
 902             ins->drexdst = regval(opx);
 903             break;
 904
 905         case4(0164):
 906             length++;
 907             ins->rex |= REX_D|REX_OC;
 908             ins->drexdst = regval(opx);
 909             break;
 910
 911         case 0171:
 912             break;
 913
 914         case 0172:
 915         case 0173:
 916         case 0174:
 917             codes++;
 918             length++;
 919             break;
 920
 921         case4(0250):
 922             length += is_sbyte32(opx) ? 1 : 4;
 923             break;
 924
 925         case4(0254):
 926             length += 4;
 927             break;
 928
 929         case4(0260):
 930             ins->rex |= REX_V;
 931             ins->drexdst = regval(opx);
 932             ins->vex_m = *codes++;
 933             ins->vex_wlp = *codes++;
 934             break;
 935
 936         case 0270:
 937             ins->rex |= REX_V;
 938             ins->drexdst = 0;
 939             ins->vex_m = *codes++;
 940             ins->vex_wlp = *codes++;
 941             break;
 942
 943         case4(0274):
 944             length++;
 945             break;
 946
 947         case4(0300):
 948             break;
 949
 950         case 0310:
 951             if (bits == 64)
 952                 return -1;
 953             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
 954             break;
 955
 956         case 0311:
 957             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
 958             break;
 959
 960         case 0312:
 961             break;
 962
 963         case 0313:
 964             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
 965                 has_prefix(ins, PPS_ASIZE, P_A32))
 966                 return -1;
 967             break;
 968
 969         case4(0314):
 970             break;
 971
 972         case 0320:
 973             length += (bits != 16);
 974             break;
 975
 976         case 0321:
 977             length += (bits == 16);
 978             break;
 979
 980         case 0322:
 981             break;
 982
 983         case 0323:
 984             rex_mask &= ~REX_W;
 985             break;
 986
 987         case 0324:
 988             ins->rex |= REX_W;
 989             break;
 990
 991         case 0330:
 992             codes++, length++;
 993             break;
 994
 995         case 0331:
 996             break;
 997
 998         case 0332:
 999         case 0333:
1000             length++;
1001             break;
1002
1003         case 0334:
1004             ins->rex |= REX_L;
1005             break;
1006
1007         case 0335:
1008             break;
1009
1010         case 0336:
1011             if (!ins->prefixes[PPS_LREP])
1012                 ins->prefixes[PPS_LREP] = P_REP;
1013             break;
1014
1015         case 0337:
1016             if (!ins->prefixes[PPS_LREP])
1017                 ins->prefixes[PPS_LREP] = P_REPNE;
1018             break;
1019
1020         case 0340:
1021             if (ins->oprs[0].segment != NO_SEG)
1022                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1023                         " quantity of BSS space");
1024             else
1025                 length += ins->oprs[0].offset;
1026             break;
1027
1028         case4(0344):
1029             length++;
1030             break;
1031
1032         case 0360:
1033             break;
1034
1035         case 0361:
1036         case 0362:
1037         case 0363:
1038             length++;
1039             break;
1040
1041         case 0364:
1042         case 0365:
1043             break;
1044
1045         case 0366:
1046         case 0367:
1047             length++;
1048             break;
1049
1050         case 0370:
1051         case 0371:
1052         case 0372:
1053             break;
1054
1055         case 0373:
1056             length++;
1057             break;
1058
1059         case4(0100):
1060         case4(0110):
1061         case4(0120):
1062         case4(0130):
1063         case4(0200):
1064         case4(0204):
1065         case4(0210):
1066         case4(0214):
1067         case4(0220):
1068         case4(0224):
1069         case4(0230):
1070         case4(0234):
1071             {
1072                 ea ea_data;
1073                 int rfield;
1074                 int32_t rflags;
1075                 struct operand *opy = &ins->oprs[op2];
1076
1077                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1078
1079                 if (c <= 0177) {
1080                     /* pick rfield from operand b (opx) */
1081                     rflags = regflag(opx);
1082                     rfield = nasm_regvals[opx->basereg];
1083                 } else {
1084                     rflags = 0;
1085                     rfield = c & 7;
1086                 }
1087                 if (!process_ea(opy, &ea_data, bits,
1088                                 ins->addr_size, rfield, rflags)) {
1089                     errfunc(ERR_NONFATAL, "invalid effective address");
1090                     return -1;
1091                 } else {
1092                     ins->rex |= ea_data.rex;
1093                     length += ea_data.size;
1094                 }
1095             }
1096             break;
1097
1098         default:
1099             errfunc(ERR_PANIC, "internal instruction table corrupt"
1100                     ": instruction code 0x%02X given", c);
1101             break;
1102         }
1103     }
1104
1105     ins->rex &= rex_mask;
1106
1107     if (ins->rex & REX_V) {
1108         int bad32 = REX_R|REX_W|REX_X|REX_B;
1109
1110         if (ins->rex & REX_H) {
1111             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1112             return -1;
1113         }
1114         switch (ins->vex_wlp & 030) {
1115         case 000:
1116         case 020:
1117             ins->rex &= ~REX_W;
1118             break;
1119         case 010:
1120             ins->rex |= REX_W;
1121             bad32 &= ~REX_W;
1122             break;
1123         case 030:
1124             /* Follow REX_W */
1125             break;
1126         }
1127
1128         if (bits != 64 && ((ins->rex & bad32) || ins->drexdst > 7)) {
1129             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1130             return -1;
1131         }
1132         if (ins->vex_m != 1 || (ins->rex & (REX_W|REX_R|REX_B)))
1133             length += 3;
1134         else
1135             length += 2;
1136     } else if (ins->rex & REX_D) {
1137         if (ins->rex & REX_H) {
1138             errfunc(ERR_NONFATAL, "cannot use high register in drex instruction");
1139             return -1;
1140         }
1141         if (bits != 64 && ((ins->rex & (REX_R|REX_W|REX_X|REX_B)) ||
1142                            ins->drexdst > 7)) {
1143             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1144             return -1;
1145         }
1146         length++;
1147     } else if (ins->rex & REX_REAL) {
1148         if (ins->rex & REX_H) {
1149             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1150             return -1;
1151         } else if (bits == 64) {
1152             length++;
1153         } else if ((ins->rex & REX_L) &&
1154                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1155                    cpu >= IF_X86_64) {
1156             /* LOCK-as-REX.R */
1157             assert_no_prefix(ins, PPS_LREP);
1158             length++;
1159         } else {
1160             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1161             return -1;
1162         }
1163     }
1164
1165     return length;
1166 }
1167
1168 #define EMIT_REX()                                                      \
1169     if (!(ins->rex & (REX_D|REX_V)) && (ins->rex & REX_REAL) && (bits == 64)) { \
1170         ins->rex = (ins->rex & REX_REAL)|REX_P;                         \
1171         out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG); \
1172         ins->rex = 0;                                                   \
1173         offset += 1; \
1174     }
1175
1176 static void gencode(int32_t segment, int64_t offset, int bits,
1177                     insn * ins, const struct itemplate *temp,
1178                     int64_t insn_end)
1179 {
1180     static char condval[] = {   /* conditional opcodes */
1181         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1182         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1183         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1184     };
1185     uint8_t c;
1186     uint8_t bytes[4];
1187     int64_t size;
1188     int64_t data;
1189     int op1, op2;
1190     struct operand *opx;
1191     const uint8_t *codes = temp->code;
1192     uint8_t opex = 0;
1193
1194     while (*codes) {
1195         c = *codes++;
1196         op1 = (c & 3) + ((opex & 1) << 2);
1197         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
1198         opx = &ins->oprs[op1];
1199         opex = 0;               /* For the next iteration */
1200
1201         switch (c) {
1202         case 01:
1203         case 02:
1204         case 03:
1205         case 04:
1206             EMIT_REX();
1207             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1208             codes += c;
1209             offset += c;
1210             break;
1211
1212         case 05:
1213         case 06:
1214         case 07:
1215             opex = c;
1216             break;
1217
1218         case4(010):
1219             EMIT_REX();
1220             bytes[0] = *codes++ + (regval(opx) & 7);
1221             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1222             offset += 1;
1223             break;
1224
1225         case4(014):
1226             /* The test for BITS8 and SBYTE here is intended to avoid
1227                warning on optimizer actions due to SBYTE, while still
1228                warn on explicit BYTE directives.  Also warn, obviously,
1229                if the optimizer isn't enabled. */
1230             if (((opx->type & BITS8) ||
1231                  !(opx->type & temp->opd[op1] & BYTENESS)) &&
1232                 (opx->offset < -128 || opx->offset > 127)) {
1233                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1234                         "signed byte value exceeds bounds");
1235             }
1236             if (opx->segment != NO_SEG) {
1237                 data = opx->offset;
1238                 out(offset, segment, &data, OUT_ADDRESS, 1,
1239                     opx->segment, opx->wrt);
1240             } else {
1241                 bytes[0] = opx->offset;
1242                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1243                     NO_SEG);
1244             }
1245             offset += 1;
1246             break;
1247
1248         case4(020):
1249             if (opx->offset < -256 || opx->offset > 255) {
1250                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1251                         "byte value exceeds bounds");
1252             }
1253             if (opx->segment != NO_SEG) {
1254                 data = opx->offset;
1255                 out(offset, segment, &data, OUT_ADDRESS, 1,
1256                     opx->segment, opx->wrt);
1257             } else {
1258                 bytes[0] = opx->offset;
1259                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1260                     NO_SEG);
1261             }
1262             offset += 1;
1263             break;
1264
1265         case4(024):
1266             if (opx->offset < 0 || opx->offset > 255)
1267                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1268                         "unsigned byte value exceeds bounds");
1269             if (opx->segment != NO_SEG) {
1270                 data = opx->offset;
1271                 out(offset, segment, &data, OUT_ADDRESS, 1,
1272                     opx->segment, opx->wrt);
1273             } else {
1274                 bytes[0] = opx->offset;
1275                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1276                     NO_SEG);
1277             }
1278             offset += 1;
1279             break;
1280
1281         case4(030):
1282             warn_overflow(2, opx);
1283             data = opx->offset;
1284             out(offset, segment, &data, OUT_ADDRESS, 2,
1285                 opx->segment, opx->wrt);
1286             offset += 2;
1287             break;
1288
1289         case4(034):
1290             if (opx->type & (BITS16 | BITS32))
1291                 size = (opx->type & BITS16) ? 2 : 4;
1292             else
1293                 size = (bits == 16) ? 2 : 4;
1294             warn_overflow(size, opx);
1295             data = opx->offset;
1296             out(offset, segment, &data, OUT_ADDRESS, size,
1297                 opx->segment, opx->wrt);
1298             offset += size;
1299             break;
1300
1301         case4(040):
1302             warn_overflow(4, opx);
1303             data = opx->offset;
1304             out(offset, segment, &data, OUT_ADDRESS, 4,
1305                 opx->segment, opx->wrt);
1306             offset += 4;
1307             break;
1308
1309         case4(044):
1310             data = opx->offset;
1311             size = ins->addr_size >> 3;
1312             warn_overflow(size, opx);
1313             out(offset, segment, &data, OUT_ADDRESS, size,
1314                 opx->segment, opx->wrt);
1315             offset += size;
1316             break;
1317
1318         case4(050):
1319             if (opx->segment != segment)
1320                 errfunc(ERR_NONFATAL,
1321                         "short relative jump outside segment");
1322             data = opx->offset - insn_end;
1323             if (data > 127 || data < -128)
1324                 errfunc(ERR_NONFATAL, "short jump is out of range");
1325             bytes[0] = data;
1326             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1327             offset += 1;
1328             break;
1329
1330         case4(054):
1331             data = (int64_t)opx->offset;
1332             out(offset, segment, &data, OUT_ADDRESS, 8,
1333                 opx->segment, opx->wrt);
1334             offset += 8;
1335             break;
1336
1337         case4(060):
1338             if (opx->segment != segment) {
1339                 data = opx->offset;
1340                 out(offset, segment, &data,
1341                     OUT_REL2ADR, insn_end - offset,
1342                     opx->segment, opx->wrt);
1343             } else {
1344                 data = opx->offset - insn_end;
1345                 out(offset, segment, &data,
1346                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1347             }
1348             offset += 2;
1349             break;
1350
1351         case4(064):
1352             if (opx->type & (BITS16 | BITS32 | BITS64))
1353                 size = (opx->type & BITS16) ? 2 : 4;
1354             else
1355                 size = (bits == 16) ? 2 : 4;
1356             if (opx->segment != segment) {
1357                 data = opx->offset;
1358                 out(offset, segment, &data,
1359                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1360                     insn_end - offset, opx->segment, opx->wrt);
1361             } else {
1362                 data = opx->offset - insn_end;
1363                 out(offset, segment, &data,
1364                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1365             }
1366             offset += size;
1367             break;
1368
1369         case4(070):
1370             if (opx->segment != segment) {
1371                 data = opx->offset;
1372                 out(offset, segment, &data,
1373                     OUT_REL4ADR, insn_end - offset,
1374                     opx->segment, opx->wrt);
1375             } else {
1376                 data = opx->offset - insn_end;
1377                 out(offset, segment, &data,
1378                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1379             }
1380             offset += 4;
1381             break;
1382
1383         case4(074):
1384             if (opx->segment == NO_SEG)
1385                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1386                         " relocatable");
1387             data = 0;
1388             out(offset, segment, &data, OUT_ADDRESS, 2,
1389                 outfmt->segbase(1 + opx->segment),
1390                 opx->wrt);
1391             offset += 2;
1392             break;
1393
1394         case4(0140):
1395             data = opx->offset;
1396             warn_overflow(2, opx);
1397             if (is_sbyte16(opx)) {
1398                 bytes[0] = data;
1399                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1400                     NO_SEG);
1401                 offset++;
1402             } else {
1403                 out(offset, segment, &data, OUT_ADDRESS, 2,
1404                     opx->segment, opx->wrt);
1405                 offset += 2;
1406             }
1407             break;
1408
1409         case4(0144):
1410             EMIT_REX();
1411             bytes[0] = *codes++;
1412             if (is_sbyte16(opx))
1413                 bytes[0] |= 2;  /* s-bit */
1414             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1415             offset++;
1416             break;
1417
1418         case4(0150):
1419             data = opx->offset;
1420             warn_overflow(4, opx);
1421             if (is_sbyte32(opx)) {
1422                 bytes[0] = data;
1423                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1424                     NO_SEG);
1425                 offset++;
1426             } else {
1427                 out(offset, segment, &data, OUT_ADDRESS, 4,
1428                     opx->segment, opx->wrt);
1429                 offset += 4;
1430             }
1431             break;
1432
1433         case4(0154):
1434             EMIT_REX();
1435             bytes[0] = *codes++;
1436             if (is_sbyte32(opx))
1437                 bytes[0] |= 2;  /* s-bit */
1438             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1439             offset++;
1440             break;
1441
1442         case4(0160):
1443         case4(0164):
1444             break;
1445
1446         case 0171:
1447             bytes[0] =
1448                 (ins->drexdst << 4) |
1449                 (ins->rex & REX_OC ? 0x08 : 0) |
1450                 (ins->rex & (REX_R|REX_X|REX_B));
1451             ins->rex = 0;
1452             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1453             offset++;
1454             break;
1455
1456         case 0172:
1457             c = *codes++;
1458             opx = &ins->oprs[c >> 3];
1459             bytes[0] = nasm_regvals[opx->basereg] << 4;
1460             opx = &ins->oprs[c & 7];
1461             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1462                 errfunc(ERR_NONFATAL,
1463                         "non-absolute expression not permitted as argument %d",
1464                         c & 7);
1465             } else {
1466                 if (opx->offset & ~15) {
1467                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1468                             "four-bit argument exceeds bounds");
1469                 }
1470                 bytes[0] |= opx->offset & 15;
1471             }
1472             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1473             offset++;
1474             break;
1475
1476         case 0173:
1477             c = *codes++;
1478             opx = &ins->oprs[c >> 4];
1479             bytes[0] = nasm_regvals[opx->basereg] << 4;
1480             bytes[0] |= c & 15;
1481             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1482             offset++;
1483             break;
1484
1485         case 0174:
1486             c = *codes++;
1487             opx = &ins->oprs[c];
1488             bytes[0] = nasm_regvals[opx->basereg] << 4;
1489             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1490             offset++;
1491             break;
1492
1493         case4(0250):
1494             data = opx->offset;
1495             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1496                 (int32_t)data != (int64_t)data) {
1497                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1498                         "signed dword immediate exceeds bounds");
1499             }
1500             if (is_sbyte32(opx)) {
1501                 bytes[0] = data;
1502                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1503                     NO_SEG);
1504                 offset++;
1505             } else {
1506                 out(offset, segment, &data, OUT_ADDRESS, 4,
1507                     opx->segment, opx->wrt);
1508                 offset += 4;
1509             }
1510             break;
1511
1512         case4(0254):
1513             data = opx->offset;
1514             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1515                 (int32_t)data != (int64_t)data) {
1516                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1517                         "signed dword immediate exceeds bounds");
1518             }
1519             out(offset, segment, &data, OUT_ADDRESS, 4,
1520                 opx->segment, opx->wrt);
1521             offset += 4;
1522             break;
1523
1524         case4(0260):
1525         case 0270:
1526             codes += 2;
1527             if (ins->vex_m != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1528                 bytes[0] = 0xc4;
1529                 bytes[1] = ins->vex_m | ((~ins->rex & 7) << 5);
1530                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1531                     ((~ins->drexdst & 15)<< 3) | (ins->vex_wlp & 07);
1532                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1533                 offset += 3;
1534             } else {
1535                 bytes[0] = 0xc5;
1536                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1537                     ((~ins->drexdst & 15) << 3) | (ins->vex_wlp & 07);
1538                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1539                 offset += 2;
1540             }
1541             break;
1542
1543         case4(0274):
1544         {
1545             uint64_t uv, um;
1546             int s;
1547
1548             if (ins->rex & REX_W)
1549                 s = 64;
1550             else if (ins->prefixes[PPS_OSIZE] == P_O16)
1551                 s = 16;
1552             else if (ins->prefixes[PPS_OSIZE] == P_O32)
1553                 s = 32;
1554             else
1555                 s = bits;
1556
1557             um = (uint64_t)2 << (s-1);
1558             uv = opx->offset;
1559
1560             if (uv > 127 && uv < (uint64_t)-128 &&
1561                 (uv < um-128 || uv > um-1)) {
1562                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1563                         "signed byte value exceeds bounds");
1564             }
1565             if (opx->segment != NO_SEG) {
1566                 data = uv;
1567                 out(offset, segment, &data, OUT_ADDRESS, 1,
1568                     opx->segment, opx->wrt);
1569             } else {
1570                 bytes[0] = uv;
1571                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1572                     NO_SEG);
1573             }
1574             offset += 1;
1575             break;
1576         }
1577
1578         case4(0300):
1579             break;
1580
1581         case 0310:
1582             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1583                 *bytes = 0x67;
1584                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1585                 offset += 1;
1586             } else
1587                 offset += 0;
1588             break;
1589
1590         case 0311:
1591             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1592                 *bytes = 0x67;
1593                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1594                 offset += 1;
1595             } else
1596                 offset += 0;
1597             break;
1598
1599         case 0312:
1600             break;
1601
1602         case 0313:
1603             ins->rex = 0;
1604             break;
1605
1606         case4(0314):
1607             break;
1608
1609         case 0320:
1610             if (bits != 16) {
1611                 *bytes = 0x66;
1612                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1613                 offset += 1;
1614             } else
1615                 offset += 0;
1616             break;
1617
1618         case 0321:
1619             if (bits == 16) {
1620                 *bytes = 0x66;
1621                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1622                 offset += 1;
1623             } else
1624                 offset += 0;
1625             break;
1626
1627         case 0322:
1628         case 0323:
1629             break;
1630
1631         case 0324:
1632             ins->rex |= REX_W;
1633             break;
1634
1635         case 0330:
1636             *bytes = *codes++ ^ condval[ins->condition];
1637             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1638             offset += 1;
1639             break;
1640
1641         case 0331:
1642             break;
1643
1644         case 0332:
1645         case 0333:
1646             *bytes = c - 0332 + 0xF2;
1647             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1648             offset += 1;
1649             break;
1650
1651         case 0334:
1652             if (ins->rex & REX_R) {
1653                 *bytes = 0xF0;
1654                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1655                 offset += 1;
1656             }
1657             ins->rex &= ~(REX_L|REX_R);
1658             break;
1659
1660         case 0335:
1661             break;
1662
1663         case 0336:
1664         case 0337:
1665             break;
1666
1667         case 0340:
1668             if (ins->oprs[0].segment != NO_SEG)
1669                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1670             else {
1671                 int64_t size = ins->oprs[0].offset;
1672                 if (size > 0)
1673                     out(offset, segment, NULL,
1674                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1675                 offset += size;
1676             }
1677             break;
1678
1679         case 0344:
1680         case 0345:
1681             bytes[0] = c & 1;
1682             switch (ins->oprs[0].basereg) {
1683             case R_CS:
1684                 bytes[0] += 0x0E;
1685                 break;
1686             case R_DS:
1687                 bytes[0] += 0x1E;
1688                 break;
1689             case R_ES:
1690                 bytes[0] += 0x06;
1691                 break;
1692             case R_SS:
1693                 bytes[0] += 0x16;
1694                 break;
1695             default:
1696                 errfunc(ERR_PANIC,
1697                         "bizarre 8086 segment register received");
1698             }
1699             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1700             offset++;
1701             break;
1702
1703         case 0346:
1704         case 0347:
1705             bytes[0] = c & 1;
1706             switch (ins->oprs[0].basereg) {
1707             case R_FS:
1708                 bytes[0] += 0xA0;
1709                 break;
1710             case R_GS:
1711                 bytes[0] += 0xA8;
1712                 break;
1713             default:
1714                 errfunc(ERR_PANIC,
1715                         "bizarre 386 segment register received");
1716             }
1717             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1718             offset++;
1719             break;
1720
1721         case 0360:
1722             break;
1723
1724         case 0361:
1725             bytes[0] = 0x66;
1726             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1727             offset += 1;
1728             break;
1729
1730         case 0362:
1731         case 0363:
1732             bytes[0] = c - 0362 + 0xf2;
1733             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1734             offset += 1;
1735             break;
1736
1737         case 0364:
1738         case 0365:
1739             break;
1740
1741         case 0366:
1742         case 0367:
1743             *bytes = c - 0366 + 0x66;
1744             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1745             offset += 1;
1746             break;
1747
1748         case 0370:
1749         case 0371:
1750         case 0372:
1751             break;
1752
1753         case 0373:
1754             *bytes = bits == 16 ? 3 : 5;
1755             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1756             offset += 1;
1757             break;
1758
1759         case4(0100):
1760         case4(0110):
1761         case4(0120):
1762         case4(0130):
1763         case4(0200):
1764         case4(0204):
1765         case4(0210):
1766         case4(0214):
1767         case4(0220):
1768         case4(0224):
1769         case4(0230):
1770         case4(0234):
1771             {
1772                 ea ea_data;
1773                 int rfield;
1774                 int32_t rflags;
1775                 uint8_t *p;
1776                 int32_t s;
1777                 enum out_type type;
1778                 struct operand *opy = &ins->oprs[op2];
1779
1780                 if (c <= 0177) {
1781                     /* pick rfield from operand b (opx) */
1782                     rflags = regflag(opx);
1783                     rfield = nasm_regvals[opx->basereg];
1784                 } else {
1785                     /* rfield is constant */
1786                     rflags = 0;
1787                     rfield = c & 7;
1788                 }
1789
1790                 if (!process_ea(opy, &ea_data, bits, ins->addr_size,
1791                                 rfield, rflags)) {
1792                     errfunc(ERR_NONFATAL, "invalid effective address");
1793                 }
1794
1795
1796                 p = bytes;
1797                 *p++ = ea_data.modrm;
1798                 if (ea_data.sib_present)
1799                     *p++ = ea_data.sib;
1800
1801                 /* DREX suffixes come between the SIB and the displacement */
1802                 if (ins->rex & REX_D) {
1803                     *p++ = (ins->drexdst << 4) |
1804                            (ins->rex & REX_OC ? 0x08 : 0) |
1805                            (ins->rex & (REX_R|REX_X|REX_B));
1806                     ins->rex = 0;
1807                 }
1808
1809                 s = p - bytes;
1810                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1811
1812                 /*
1813                  * Make sure the address gets the right offset in case
1814                  * the line breaks in the .lst file (BR 1197827)
1815                  */
1816                 offset += s;
1817                 s = 0;
1818
1819                 switch (ea_data.bytes) {
1820                 case 0:
1821                     break;
1822                 case 1:
1823                     if (opy->segment != NO_SEG) {
1824                         data = opy->offset;
1825                         out(offset, segment, &data, OUT_ADDRESS, 1,
1826                             opy->segment, opy->wrt);
1827                     } else {
1828                         *bytes = opy->offset;
1829                         out(offset, segment, bytes, OUT_RAWDATA, 1,
1830                             NO_SEG, NO_SEG);
1831                     }
1832                     s++;
1833                     break;
1834                 case 8:
1835                 case 2:
1836                 case 4:
1837                     data = opy->offset;
1838                     warn_overflow(ea_data.bytes, opy);
1839                     s += ea_data.bytes;
1840                     if (ea_data.rip) {
1841                         if (opy->segment == segment) {
1842                             data -= insn_end;
1843                             out(offset, segment, &data, OUT_ADDRESS,
1844                                 ea_data.bytes, NO_SEG, NO_SEG);
1845                         } else {
1846                             out(offset, segment, &data, OUT_REL4ADR,
1847                                 insn_end - offset, opy->segment, opy->wrt);
1848                         }
1849                     } else {
1850                         type = OUT_ADDRESS;
1851                         out(offset, segment, &data, OUT_ADDRESS,
1852                             ea_data.bytes, opy->segment, opy->wrt);
1853                     }
1854                     break;
1855                 }
1856                 offset += s;
1857             }
1858             break;
1859
1860         default:
1861             errfunc(ERR_PANIC, "internal instruction table corrupt"
1862                     ": instruction code 0x%02X given", c);
1863             break;
1864         }
1865     }
1866 }
1867
1868 static int32_t regflag(const operand * o)
1869 {
1870     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1871         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1872     }
1873     return nasm_reg_flags[o->basereg];
1874 }
1875
1876 static int32_t regval(const operand * o)
1877 {
1878     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1879         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1880     }
1881     return nasm_regvals[o->basereg];
1882 }
1883
1884 static int op_rexflags(const operand * o, int mask)
1885 {
1886     int32_t flags;
1887     int val;
1888
1889     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1890         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1891     }
1892
1893     flags = nasm_reg_flags[o->basereg];
1894     val = nasm_regvals[o->basereg];
1895
1896     return rexflags(val, flags, mask);
1897 }
1898
1899 static int rexflags(int val, int32_t flags, int mask)
1900 {
1901     int rex = 0;
1902
1903     if (val >= 8)
1904         rex |= REX_B|REX_X|REX_R;
1905     if (flags & BITS64)
1906         rex |= REX_W;
1907     if (!(REG_HIGH & ~flags))   /* AH, CH, DH, BH */
1908         rex |= REX_H;
1909     else if (!(REG8 & ~flags) && val >= 4) /* SPL, BPL, SIL, DIL */
1910         rex |= REX_P;
1911
1912     return rex & mask;
1913 }
1914
1915 static int matches(const struct itemplate *itemp, insn * instruction, int bits)
1916 {
1917     int i, size[MAX_OPERANDS], asize, oprs, ret;
1918
1919     ret = 100;
1920
1921     /*
1922      * Check the opcode
1923      */
1924     if (itemp->opcode != instruction->opcode)
1925         return 0;
1926
1927     /*
1928      * Count the operands
1929      */
1930     if (itemp->operands != instruction->operands)
1931         return 0;
1932
1933     /*
1934      * Check that no spurious colons or TOs are present
1935      */
1936     for (i = 0; i < itemp->operands; i++)
1937         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
1938             return 0;
1939
1940     /*
1941      * Process size flags
1942      */
1943     if (itemp->flags & IF_ARMASK) {
1944         memset(size, 0, sizeof size);
1945
1946         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
1947
1948         switch (itemp->flags & IF_SMASK) {
1949         case IF_SB:
1950             size[i] = BITS8;
1951             break;
1952         case IF_SW:
1953             size[i] = BITS16;
1954             break;
1955         case IF_SD:
1956             size[i] = BITS32;
1957             break;
1958         case IF_SQ:
1959             size[i] = BITS64;
1960             break;
1961         case IF_SO:
1962             size[i] = BITS128;
1963             break;
1964         case IF_SY:
1965             size[i] = BITS256;
1966             break;
1967         case IF_SZ:
1968             switch (bits) {
1969             case 16:
1970                 size[i] = BITS16;
1971                 break;
1972             case 32:
1973                 size[i] = BITS32;
1974                 break;
1975             case 64:
1976                 size[i] = BITS64;
1977                 break;
1978             }
1979             break;
1980         default:
1981             break;
1982         }
1983     } else {
1984         asize = 0;
1985         switch (itemp->flags & IF_SMASK) {
1986         case IF_SB:
1987             asize = BITS8;
1988             break;
1989         case IF_SW:
1990             asize = BITS16;
1991             break;
1992         case IF_SD:
1993             asize = BITS32;
1994             break;
1995         case IF_SQ:
1996             asize = BITS64;
1997             break;
1998         case IF_SO:
1999             asize = BITS128;
2000             break;
2001         case IF_SY:
2002             asize = BITS256;
2003             break;
2004         case IF_SZ:
2005             switch (bits) {
2006             case 16:
2007                 asize = BITS16;
2008                 break;
2009             case 32:
2010                 asize = BITS32;
2011                 break;
2012             case 64:
2013                 asize = BITS64;
2014                 break;
2015             }
2016             break;
2017         default:
2018             break;
2019         }
2020         for (i = 0; i < MAX_OPERANDS; i++)
2021             size[i] = asize;
2022     }
2023
2024     /*
2025      * Check that the operand flags all match up
2026      */
2027     for (i = 0; i < itemp->operands; i++) {
2028         int32_t type = instruction->oprs[i].type;
2029         if (!(type & SIZE_MASK))
2030             type |= size[i];
2031
2032         if (itemp->opd[i] & SAME_AS) {
2033             int j = itemp->opd[i] & ~SAME_AS;
2034             if (type != instruction->oprs[j].type ||
2035                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2036                 return 0;
2037         } else if (itemp->opd[i] & ~type ||
2038             ((itemp->opd[i] & SIZE_MASK) &&
2039              ((itemp->opd[i] ^ type) & SIZE_MASK))) {
2040             if ((itemp->opd[i] & ~type & ~SIZE_MASK) ||
2041                 (type & SIZE_MASK))
2042                 return 0;
2043             else
2044                 return 1;
2045         }
2046     }
2047
2048     /*
2049      * Check operand sizes
2050      */
2051     if (itemp->flags & (IF_SM | IF_SM2)) {
2052         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2053         asize = 0;
2054         for (i = 0; i < oprs; i++) {
2055             if ((asize = itemp->opd[i] & SIZE_MASK) != 0) {
2056                 int j;
2057                 for (j = 0; j < oprs; j++)
2058                     size[j] = asize;
2059                 break;
2060             }
2061         }
2062     } else {
2063         oprs = itemp->operands;
2064     }
2065
2066     for (i = 0; i < itemp->operands; i++) {
2067         if (!(itemp->opd[i] & SIZE_MASK) &&
2068             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2069             return 2;
2070     }
2071
2072     /*
2073      * Check template is okay at the set cpu level
2074      */
2075     if (((itemp->flags & IF_PLEVEL) > cpu))
2076         return 3;
2077
2078     /*
2079      * Check if instruction is available in long mode
2080      */
2081     if ((itemp->flags & IF_NOLONG) && (bits == 64))
2082         return 4;
2083
2084     /*
2085      * Check if special handling needed for Jumps
2086      */
2087     if ((uint8_t)(itemp->code[0]) >= 0370)
2088         return 99;
2089
2090     return ret;
2091 }
2092
2093 static ea *process_ea(operand * input, ea * output, int bits,
2094                       int addrbits, int rfield, int32_t rflags)
2095 {
2096     bool forw_ref = !!(input->opflags & OPFLAG_FORWARD);
2097
2098     output->rip = false;
2099
2100     /* REX flags for the rfield operand */
2101     output->rex |= rexflags(rfield, rflags, REX_R|REX_P|REX_W|REX_H);
2102
2103     if (!(REGISTER & ~input->type)) {   /* register direct */
2104         int i;
2105         int32_t f;
2106
2107         if (input->basereg < EXPR_REG_START /* Verify as Register */
2108             || input->basereg >= REG_ENUM_LIMIT)
2109             return NULL;
2110         f = regflag(input);
2111         i = nasm_regvals[input->basereg];
2112
2113         if (REG_EA & ~f)
2114             return NULL;        /* Invalid EA register */
2115
2116         output->rex |= op_rexflags(input, REX_B|REX_P|REX_W|REX_H);
2117
2118         output->sib_present = false;             /* no SIB necessary */
2119         output->bytes = 0;  /* no offset necessary either */
2120         output->modrm = 0xC0 | ((rfield & 7) << 3) | (i & 7);
2121     } else {                    /* it's a memory reference */
2122         if (input->basereg == -1
2123             && (input->indexreg == -1 || input->scale == 0)) {
2124             /* it's a pure offset */
2125             if (bits == 64 && (~input->type & IP_REL)) {
2126               int scale, index, base;
2127               output->sib_present = true;
2128               scale = 0;
2129               index = 4;
2130               base = 5;
2131               output->sib = (scale << 6) | (index << 3) | base;
2132               output->bytes = 4;
2133               output->modrm = 4 | ((rfield & 7) << 3);
2134               output->rip = false;
2135             } else {
2136               output->sib_present = false;
2137               output->bytes = (addrbits != 16 ? 4 : 2);
2138               output->modrm = (addrbits != 16 ? 5 : 6) | ((rfield & 7) << 3);
2139               output->rip = bits == 64;
2140             }
2141         } else {                /* it's an indirection */
2142             int i = input->indexreg, b = input->basereg, s = input->scale;
2143             int32_t o = input->offset, seg = input->segment;
2144             int hb = input->hintbase, ht = input->hinttype;
2145             int t;
2146             int it, bt;
2147             int32_t ix, bx;     /* register flags */
2148
2149             if (s == 0)
2150                 i = -1;         /* make this easy, at least */
2151
2152             if (i >= EXPR_REG_START && i < REG_ENUM_LIMIT) {
2153                 it = nasm_regvals[i];
2154                 ix = nasm_reg_flags[i];
2155             } else {
2156                 it = -1;
2157                 ix = 0;
2158             }
2159
2160             if (b >= EXPR_REG_START && b < REG_ENUM_LIMIT) {
2161                 bt = nasm_regvals[b];
2162                 bx = nasm_reg_flags[b];
2163             } else {
2164                 bt = -1;
2165                 bx = 0;
2166             }
2167
2168             /* check for a 32/64-bit memory reference... */
2169             if ((ix|bx) & (BITS32|BITS64)) {
2170                 /* it must be a 32/64-bit memory reference. Firstly we have
2171                  * to check that all registers involved are type E/Rxx. */
2172                 int32_t sok = BITS32|BITS64;
2173
2174                 if (it != -1) {
2175                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2176                         sok &= ix;
2177                     else
2178                         return NULL;
2179                 }
2180
2181                 if (bt != -1) {
2182                     if (REG_GPR & ~bx)
2183                         return NULL; /* Invalid register */
2184                     if (~sok & bx & SIZE_MASK)
2185                         return NULL; /* Invalid size */
2186                     sok &= bx;
2187                 }
2188
2189                 /* While we're here, ensure the user didn't specify
2190                    WORD or QWORD. */
2191                 if (input->disp_size == 16 || input->disp_size == 64)
2192                     return NULL;
2193
2194                 if (addrbits == 16 ||
2195                     (addrbits == 32 && !(sok & BITS32)) ||
2196                     (addrbits == 64 && !(sok & BITS64)))
2197                     return NULL;
2198
2199                 /* now reorganize base/index */
2200                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2201                     ((hb == b && ht == EAH_NOTBASE)
2202                      || (hb == i && ht == EAH_MAKEBASE))) {
2203                     /* swap if hints say so */
2204                     t = bt, bt = it, it = t;
2205                     t = bx, bx = ix, ix = t;
2206                 }
2207                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2208                     bt = -1, bx = 0, s++;
2209                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2210                     /* make single reg base, unless hint */
2211                     bt = it, bx = ix, it = -1, ix = 0;
2212                 }
2213                 if (((s == 2 && it != REG_NUM_ESP
2214                       && !(input->eaflags & EAF_TIMESTWO)) || s == 3
2215                      || s == 5 || s == 9) && bt == -1)
2216                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2217                 if (it == -1 && (bt & 7) != REG_NUM_ESP
2218                     && (input->eaflags & EAF_TIMESTWO))
2219                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2220                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2221                 if (s == 1 && it == REG_NUM_ESP) {
2222                     /* swap ESP into base if scale is 1 */
2223                     t = it, it = bt, bt = t;
2224                     t = ix, ix = bx, bx = t;
2225                 }
2226                 if (it == REG_NUM_ESP
2227                     || (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2228                     return NULL;        /* wrong, for various reasons */
2229
2230                 output->rex |= rexflags(it, ix, REX_X);
2231                 output->rex |= rexflags(bt, bx, REX_B);
2232
2233                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2234                     /* no SIB needed */
2235                     int mod, rm;
2236
2237                     if (bt == -1) {
2238                         rm = 5;
2239                         mod = 0;
2240                     } else {
2241                         rm = (bt & 7);
2242                         if (rm != REG_NUM_EBP && o == 0 &&
2243                                 seg == NO_SEG && !forw_ref &&
2244                                 !(input->eaflags &
2245                                   (EAF_BYTEOFFS | EAF_WORDOFFS)))
2246                             mod = 0;
2247                         else if (input->eaflags & EAF_BYTEOFFS ||
2248                                  (o >= -128 && o <= 127 && seg == NO_SEG
2249                                   && !forw_ref
2250                                   && !(input->eaflags & EAF_WORDOFFS)))
2251                             mod = 1;
2252                         else
2253                             mod = 2;
2254                     }
2255
2256                     output->sib_present = false;
2257                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2258                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2259                 } else {
2260                     /* we need a SIB */
2261                     int mod, scale, index, base;
2262
2263                     if (it == -1)
2264                         index = 4, s = 1;
2265                     else
2266                         index = (it & 7);
2267
2268                     switch (s) {
2269                     case 1:
2270                         scale = 0;
2271                         break;
2272                     case 2:
2273                         scale = 1;
2274                         break;
2275                     case 4:
2276                         scale = 2;
2277                         break;
2278                     case 8:
2279                         scale = 3;
2280                         break;
2281                     default:   /* then what the smeg is it? */
2282                         return NULL;    /* panic */
2283                     }
2284
2285                     if (bt == -1) {
2286                         base = 5;
2287                         mod = 0;
2288                     } else {
2289                         base = (bt & 7);
2290                         if (base != REG_NUM_EBP && o == 0 &&
2291                                     seg == NO_SEG && !forw_ref &&
2292                                     !(input->eaflags &
2293                                       (EAF_BYTEOFFS | EAF_WORDOFFS)))
2294                             mod = 0;
2295                         else if (input->eaflags & EAF_BYTEOFFS ||
2296                                  (o >= -128 && o <= 127 && seg == NO_SEG
2297                                   && !forw_ref
2298                                   && !(input->eaflags & EAF_WORDOFFS)))
2299                             mod = 1;
2300                         else
2301                             mod = 2;
2302                     }
2303
2304                     output->sib_present = true;
2305                     output->bytes =  (bt == -1 || mod == 2 ? 4 : mod);
2306                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | 4;
2307                     output->sib = (scale << 6) | (index << 3) | base;
2308                 }
2309             } else {            /* it's 16-bit */
2310                 int mod, rm;
2311
2312                 /* check for 64-bit long mode */
2313                 if (addrbits == 64)
2314                     return NULL;
2315
2316                 /* check all registers are BX, BP, SI or DI */
2317                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI
2318                      && b != R_DI) || (i != -1 && i != R_BP && i != R_BX
2319                                        && i != R_SI && i != R_DI))
2320                     return NULL;
2321
2322                 /* ensure the user didn't specify DWORD/QWORD */
2323                 if (input->disp_size == 32 || input->disp_size == 64)
2324                     return NULL;
2325
2326                 if (s != 1 && i != -1)
2327                     return NULL;        /* no can do, in 16-bit EA */
2328                 if (b == -1 && i != -1) {
2329                     int tmp = b;
2330                     b = i;
2331                     i = tmp;
2332                 }               /* swap */
2333                 if ((b == R_SI || b == R_DI) && i != -1) {
2334                     int tmp = b;
2335                     b = i;
2336                     i = tmp;
2337                 }
2338                 /* have BX/BP as base, SI/DI index */
2339                 if (b == i)
2340                     return NULL;        /* shouldn't ever happen, in theory */
2341                 if (i != -1 && b != -1 &&
2342                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2343                     return NULL;        /* invalid combinations */
2344                 if (b == -1)    /* pure offset: handled above */
2345                     return NULL;        /* so if it gets to here, panic! */
2346
2347                 rm = -1;
2348                 if (i != -1)
2349                     switch (i * 256 + b) {
2350                     case R_SI * 256 + R_BX:
2351                         rm = 0;
2352                         break;
2353                     case R_DI * 256 + R_BX:
2354                         rm = 1;
2355                         break;
2356                     case R_SI * 256 + R_BP:
2357                         rm = 2;
2358                         break;
2359                     case R_DI * 256 + R_BP:
2360                         rm = 3;
2361                         break;
2362                 } else
2363                     switch (b) {
2364                     case R_SI:
2365                         rm = 4;
2366                         break;
2367                     case R_DI:
2368                         rm = 5;
2369                         break;
2370                     case R_BP:
2371                         rm = 6;
2372                         break;
2373                     case R_BX:
2374                         rm = 7;
2375                         break;
2376                     }
2377                 if (rm == -1)   /* can't happen, in theory */
2378                     return NULL;        /* so panic if it does */
2379
2380                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2381                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2382                     mod = 0;
2383                 else if (input->eaflags & EAF_BYTEOFFS ||
2384                          (o >= -128 && o <= 127 && seg == NO_SEG
2385                           && !forw_ref
2386                           && !(input->eaflags & EAF_WORDOFFS)))
2387                     mod = 1;
2388                 else
2389                     mod = 2;
2390
2391                 output->sib_present = false;    /* no SIB - it's 16-bit */
2392                 output->bytes = mod;    /* bytes of offset needed */
2393                 output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2394             }
2395         }
2396     }
2397
2398     output->size = 1 + output->sib_present + output->bytes;
2399     return output;
2400 }
2401
2402 static void add_asp(insn *ins, int addrbits)
2403 {
2404     int j, valid;
2405     int defdisp;
2406
2407     valid = (addrbits == 64) ? 64|32 : 32|16;
2408
2409     switch (ins->prefixes[PPS_ASIZE]) {
2410     case P_A16:
2411         valid &= 16;
2412         break;
2413     case P_A32:
2414         valid &= 32;
2415         break;
2416     case P_A64:
2417         valid &= 64;
2418         break;
2419     case P_ASP:
2420         valid &= (addrbits == 32) ? 16 : 32;
2421         break;
2422     default:
2423         break;
2424     }
2425
2426     for (j = 0; j < ins->operands; j++) {
2427         if (!(MEMORY & ~ins->oprs[j].type)) {
2428             int32_t i, b;
2429
2430             /* Verify as Register */
2431             if (ins->oprs[j].indexreg < EXPR_REG_START
2432                 || ins->oprs[j].indexreg >= REG_ENUM_LIMIT)
2433                 i = 0;
2434             else
2435                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2436
2437             /* Verify as Register */
2438             if (ins->oprs[j].basereg < EXPR_REG_START
2439                 || ins->oprs[j].basereg >= REG_ENUM_LIMIT)
2440                 b = 0;
2441             else
2442                 b = nasm_reg_flags[ins->oprs[j].basereg];
2443
2444             if (ins->oprs[j].scale == 0)
2445                 i = 0;
2446
2447             if (!i && !b) {
2448                 int ds = ins->oprs[j].disp_size;
2449                 if ((addrbits != 64 && ds > 8) ||
2450                     (addrbits == 64 && ds == 16))
2451                     valid &= ds;
2452             } else {
2453                 if (!(REG16 & ~b))
2454                     valid &= 16;
2455                 if (!(REG32 & ~b))
2456                     valid &= 32;
2457                 if (!(REG64 & ~b))
2458                     valid &= 64;
2459
2460                 if (!(REG16 & ~i))
2461                     valid &= 16;
2462                 if (!(REG32 & ~i))
2463                     valid &= 32;
2464                 if (!(REG64 & ~i))
2465                     valid &= 64;
2466             }
2467         }
2468     }
2469
2470     if (valid & addrbits) {
2471         ins->addr_size = addrbits;
2472     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2473         /* Add an address size prefix */
2474         enum prefixes pref = (addrbits == 32) ? P_A16 : P_A32;
2475         ins->prefixes[PPS_ASIZE] = pref;
2476         ins->addr_size = (addrbits == 32) ? 16 : 32;
2477     } else {
2478         /* Impossible... */
2479         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2480         ins->addr_size = addrbits; /* Error recovery */
2481     }
2482
2483     defdisp = ins->addr_size == 16 ? 16 : 32;
2484
2485     for (j = 0; j < ins->operands; j++) {
2486         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2487             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp)
2488             != ins->addr_size) {
2489             /* mem_offs sizes must match the address size; if not,
2490                strip the MEM_OFFS bit and match only EA instructions */
2491             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2492         }
2493     }
2494 }