assemble.c

   1 /* assemble.c   code generation for the Netwide Assembler
   2  *
   3  * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
   4  * Julian Hall. All rights reserved. The software is
   5  * redistributable under the license given in the file "LICENSE"
   6  * distributed in the NASM archive.
   7  *
   8  * the actual codes (C syntax, i.e. octal):
   9  * \0            - terminates the code. (Unless it's a literal of course.)
  10  * \1..\4        - that many literal bytes follow in the code stream
  11  * \5            - add 4 to the primary operand number (b, low octdigit)
  12  * \6            - add 4 to the secondary operand number (a, middle octdigit)
  13  * \7            - add 4 to both the primary and the secondary operand number
  14  * \10..\13      - a literal byte follows in the code stream, to be added
  15  *                 to the register value of operand 0..3
  16  * \14..\17      - a signed byte immediate operand, from operand 0..3
  17  * \20..\23      - a byte immediate operand, from operand 0..3
  18  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  19  * \30..\33      - a word immediate operand, from operand 0..3
  20  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  21  *                 assembly mode or the operand-size override on the operand
  22  * \40..\43      - a long immediate operand, from operand 0..3
  23  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  24  *                 depending on the address size of the instruction.
  25  * \50..\53      - a byte relative operand, from operand 0..3
  26  * \54..\57      - a qword immediate operand, from operand 0..3
  27  * \60..\63      - a word relative operand, from operand 0..3
  28  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  29  *                 assembly mode or the operand-size override on the operand
  30  * \70..\73      - a long relative operand, from operand 0..3
  31  * \74..\77      - a word constant, from the _segment_ part of operand 0..3
  32  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  33  *                 field the register value of operand b.
  34  * \140..\143    - an immediate word or signed byte for operand 0..3
  35  * \144..\147    - or 2 (s-field) into opcode byte if operand 0..3
  36  *                  is a signed byte rather than a word.  Opcode byte follows.
  37  * \150..\153    - an immediate dword or signed byte for operand 0..3
  38  * \154..\157    - or 2 (s-field) into opcode byte if operand 0..3
  39  *                  is a signed byte rather than a dword.  Opcode byte follows.
  40  * \160..\163    - this instruction uses DREX rather than REX, with the
  41  *                 OC0 field set to 0, and the dest field taken from
  42  *                 operand 0..3.
  43  * \164..\167    - this instruction uses DREX rather than REX, with the
  44  *                 OC0 field set to 1, and the dest field taken from
  45  *                 operand 0..3.
  46  * \171          - placement of DREX suffix in the absence of an EA
  47  * \172\ab       - the register number from operand a in bits 7..4, with
  48  *                 the 4-bit immediate from operand b in bits 3..0.
  49  * \173\xab      - the register number from operand a in bits 7..4, with
  50  *                 the value b in bits 3..0.
  51  * \174\a        - the register number from operand a in bits 7..4, and
  52  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  53  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  54  *                 field equal to digit b.
  55  * \250..\253    - same as \150..\153, except warn if the 64-bit operand
  56  *                 is not equal to the truncated and sign-extended 32-bit
  57  *                 operand; used for 32-bit immediates in 64-bit mode.
  58  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
  59  * \260..\263    - this instruction uses VEX rather than REX, with the
  60  *                 V field taken from operand 0..3.
  61  * \270          - this instruction uses VEX rather than REX, with the
  62  *                 V field set to 1111b.
  63  *
  64  * VEX prefixes are followed by the sequence:
  65  * \mm\wlp         where mm is the M field; and wlp is:
  66  *                 00 0ww lpp
  67  *                 [w0] ww = 0 for W = 0
  68  *                 [w1] ww = 1 for W = 1
  69  *                 [wx] ww = 2 for W don't care (always assembled as 0)
  70  *                 [ww] ww = 3 for W used as REX.W
  71  *
  72  *
  73  * \274..\277    - a signed byte immediate operand, from operand 0..3,
  74  *                 which is to be extended to the operand size.
  75  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
  76  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
  77  * \312          - (disassembler only) marker on LOOP, LOOPxx instructions.
  78  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
  79  * \314          - (disassembler only) invalid with REX.B
  80  * \315          - (disassembler only) invalid with REX.X
  81  * \316          - (disassembler only) invalid with REX.R
  82  * \317          - (disassembler only) invalid with REX.W
  83  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
  84  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
  85  * \322          - indicates that this instruction is only valid when the
  86  *                 operand size is the default (instruction to disassembler,
  87  *                 generates no code in the assembler)
  88  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
  89  * \324          - indicates 64-bit operand size requiring REX prefix.
  90  * \330          - a literal byte follows in the code stream, to be added
  91  *                 to the condition code value of the instruction.
  92  * \331          - instruction not valid with REP prefix.  Hint for
  93  *                 disassembler only; for SSE instructions.
  94  * \332          - REP prefix (0xF2 byte) used as opcode extension.
  95  * \333          - REP prefix (0xF3 byte) used as opcode extension.
  96  * \334          - LOCK prefix used instead of REX.R
  97  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
  98  * \336          - force a REP(E) prefix (0xF2) even if not specified.
  99  * \337          - force a REPNE prefix (0xF3) even if not specified.
 100  *                 \336-\337 are still listed as prefixes in the disassembler.
 101  * \340          - reserve <operand 0> bytes of uninitialized storage.
 102  *                 Operand 0 had better be a segmentless constant.
 103  * \344,\345     - the PUSH/POP (respectively) codes for CS, DS, ES, SS
 104  *                 (POP is never used for CS) depending on operand 0
 105  * \346,\347     - the second byte of PUSH/POP codes for FS, GS, depending
 106  *                 on operand 0
 107  * \360          - no SSE prefix (== \364\331)
 108  * \361          - 66 SSE prefix (== \366\331)
 109  * \362          - F2 SSE prefix (== \364\332)
 110  * \363          - F3 SSE prefix (== \364\333)
 111  * \364          - operand-size prefix (0x66) not permitted
 112  * \365          - address-size prefix (0x67) not permitted
 113  * \366          - operand-size prefix (0x66) used as opcode extension
 114  * \367          - address-size prefix (0x67) used as opcode extension
 115  * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
 116  *                 370 is used for Jcc, 371 is used for JMP.
 117  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 118  *                 used for conditional jump over longer jump
 119  */
 120
 121 #include "compiler.h"
 122
 123 #include <stdio.h>
 124 #include <string.h>
 125 #include <inttypes.h>
 126
 127 #include "nasm.h"
 128 #include "nasmlib.h"
 129 #include "assemble.h"
 130 #include "insns.h"
 131 #include "tables.h"
 132
 133 /* Initialized to zero by the C standard */
 134 static const uint8_t const_zero_buf[256];
 135
 136 typedef struct {
 137     int sib_present;                 /* is a SIB byte necessary? */
 138     int bytes;                       /* # of bytes of offset needed */
 139     int size;                        /* lazy - this is sib+bytes+1 */
 140     uint8_t modrm, sib, rex, rip;    /* the bytes themselves */
 141 } ea;
 142
 143 static uint32_t cpu;            /* cpu level received from nasm.c */
 144 static efunc errfunc;
 145 static struct ofmt *outfmt;
 146 static ListGen *list;
 147
 148 static int64_t calcsize(int32_t, int64_t, int, insn *, const uint8_t *);
 149 static void gencode(int32_t segment, int64_t offset, int bits,
 150                     insn * ins, const struct itemplate *temp,
 151                     int64_t insn_end);
 152 static int matches(const struct itemplate *, insn *, int bits);
 153 static int32_t regflag(const operand *);
 154 static int32_t regval(const operand *);
 155 static int rexflags(int, int32_t, int);
 156 static int op_rexflags(const operand *, int);
 157 static ea *process_ea(operand *, ea *, int, int, int, int32_t);
 158 static void add_asp(insn *, int);
 159
 160 static int has_prefix(insn * ins, enum prefix_pos pos, enum prefixes prefix)
 161 {
 162     return ins->prefixes[pos] == prefix;
 163 }
 164
 165 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 166 {
 167     if (ins->prefixes[pos])
 168         errfunc(ERR_NONFATAL, "invalid %s prefix",
 169                 prefix_name(ins->prefixes[pos]));
 170 }
 171
 172 static const char *size_name(int size)
 173 {
 174     switch (size) {
 175     case 1:
 176         return "byte";
 177     case 2:
 178         return "word";
 179     case 4:
 180         return "dword";
 181     case 8:
 182         return "qword";
 183     case 10:
 184         return "tword";
 185     case 16:
 186         return "oword";
 187     case 32:
 188         return "yword";
 189     default:
 190         return "???";
 191     }
 192 }
 193
 194 static void warn_overflow(int size, const struct operand *o)
 195 {
 196     if (size < 8 && o->wrt == NO_SEG && o->segment == NO_SEG) {
 197         int64_t lim = ((int64_t)1 << (size*8))-1;
 198         int64_t data = o->offset;
 199
 200         if (data < ~lim || data > lim)
 201             errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
 202                     "%s data exceeds bounds", size_name(size));
 203     }
 204 }
 205 /*
 206  * This routine wrappers the real output format's output routine,
 207  * in order to pass a copy of the data off to the listing file
 208  * generator at the same time.
 209  */
 210 static void out(int64_t offset, int32_t segto, const void *data,
 211                 enum out_type type, uint64_t size,
 212                 int32_t segment, int32_t wrt)
 213 {
 214     static int32_t lineno = 0;     /* static!!! */
 215     static char *lnfname = NULL;
 216     uint8_t p[8];
 217
 218     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 219         /*
 220          * This is a non-relocated address, and we're going to
 221          * convert it into RAWDATA format.
 222          */
 223         uint8_t *q = p;
 224
 225         if (size > 8) {
 226             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 227             return;
 228         }
 229
 230         WRITEADDR(q, *(int64_t *)data, size);
 231         data = p;
 232         type = OUT_RAWDATA;
 233     }
 234
 235     list->output(offset, data, type, size);
 236
 237     /*
 238      * this call to src_get determines when we call the
 239      * debug-format-specific "linenum" function
 240      * it updates lineno and lnfname to the current values
 241      * returning 0 if "same as last time", -2 if lnfname
 242      * changed, and the amount by which lineno changed,
 243      * if it did. thus, these variables must be static
 244      */
 245
 246     if (src_get(&lineno, &lnfname)) {
 247         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 248     }
 249
 250     outfmt->output(segto, data, type, size, segment, wrt);
 251 }
 252
 253 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 254                      insn * ins, const uint8_t *code)
 255 {
 256     int64_t isize;
 257     uint8_t c = code[0];
 258
 259     if ((c != 0370 && c != 0371) || (ins->oprs[0].type & STRICT))
 260         return false;
 261     if (!optimizing)
 262         return false;
 263     if (optimizing < 0 && c == 0371)
 264         return false;
 265
 266     isize = calcsize(segment, offset, bits, ins, code);
 267     if (ins->oprs[0].segment != segment)
 268         return false;
 269
 270     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 271     return (isize >= -128 && isize <= 127); /* is it byte size? */
 272 }
 273
 274 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 275               insn * instruction, struct ofmt *output, efunc error,
 276               ListGen * listgen)
 277 {
 278     const struct itemplate *temp;
 279     int j;
 280     int size_prob;
 281     int64_t insn_end;
 282     int32_t itimes;
 283     int64_t start = offset;
 284     int64_t wsize = 0;             /* size for DB etc. */
 285
 286     errfunc = error;            /* to pass to other functions */
 287     cpu = cp;
 288     outfmt = output;            /* likewise */
 289     list = listgen;             /* and again */
 290
 291     switch (instruction->opcode) {
 292     case -1:
 293         return 0;
 294     case I_DB:
 295         wsize = 1;
 296         break;
 297     case I_DW:
 298         wsize = 2;
 299         break;
 300     case I_DD:
 301         wsize = 4;
 302         break;
 303     case I_DQ:
 304         wsize = 8;
 305         break;
 306     case I_DT:
 307         wsize = 10;
 308         break;
 309     case I_DO:
 310         wsize = 16;
 311         break;
 312     case I_DY:
 313         wsize = 32;
 314         break;
 315     default:
 316         break;
 317     }
 318
 319     if (wsize) {
 320         extop *e;
 321         int32_t t = instruction->times;
 322         if (t < 0)
 323             errfunc(ERR_PANIC,
 324                     "instruction->times < 0 (%ld) in assemble()", t);
 325
 326         while (t--) {           /* repeat TIMES times */
 327             for (e = instruction->eops; e; e = e->next) {
 328                 if (e->type == EOT_DB_NUMBER) {
 329                     if (wsize == 1) {
 330                         if (e->segment != NO_SEG)
 331                             errfunc(ERR_NONFATAL,
 332                                     "one-byte relocation attempted");
 333                         else {
 334                             uint8_t out_byte = e->offset;
 335                             out(offset, segment, &out_byte,
 336                                 OUT_RAWDATA, 1, NO_SEG, NO_SEG);
 337                         }
 338                     } else if (wsize > 8) {
 339                         errfunc(ERR_NONFATAL,
 340                                 "integer supplied to a DT, DO or DY"
 341                                 " instruction");
 342                     } else
 343                         out(offset, segment, &e->offset,
 344                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 345                     offset += wsize;
 346                 } else if (e->type == EOT_DB_STRING ||
 347                            e->type == EOT_DB_STRING_FREE) {
 348                     int align;
 349
 350                     out(offset, segment, e->stringval,
 351                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 352                     align = e->stringlen % wsize;
 353
 354                     if (align) {
 355                         align = wsize - align;
 356                         out(offset, segment, const_zero_buf,
 357                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 358                     }
 359                     offset += e->stringlen + align;
 360                 }
 361             }
 362             if (t > 0 && t == instruction->times - 1) {
 363                 /*
 364                  * Dummy call to list->output to give the offset to the
 365                  * listing module.
 366                  */
 367                 list->output(offset, NULL, OUT_RAWDATA, 0);
 368                 list->uplevel(LIST_TIMES);
 369             }
 370         }
 371         if (instruction->times > 1)
 372             list->downlevel(LIST_TIMES);
 373         return offset - start;
 374     }
 375
 376     if (instruction->opcode == I_INCBIN) {
 377         const char *fname = instruction->eops->stringval;
 378         FILE *fp;
 379
 380         fp = fopen(fname, "rb");
 381         if (!fp) {
 382             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 383                   fname);
 384         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 385             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 386                   fname);
 387         } else {
 388             static char buf[4096];
 389             size_t t = instruction->times;
 390             size_t base = 0;
 391             size_t len;
 392
 393             len = ftell(fp);
 394             if (instruction->eops->next) {
 395                 base = instruction->eops->next->offset;
 396                 len -= base;
 397                 if (instruction->eops->next->next &&
 398                     len > (size_t)instruction->eops->next->next->offset)
 399                     len = (size_t)instruction->eops->next->next->offset;
 400             }
 401             /*
 402              * Dummy call to list->output to give the offset to the
 403              * listing module.
 404              */
 405             list->output(offset, NULL, OUT_RAWDATA, 0);
 406             list->uplevel(LIST_INCBIN);
 407             while (t--) {
 408                 size_t l;
 409
 410                 fseek(fp, base, SEEK_SET);
 411                 l = len;
 412                 while (l > 0) {
 413                     int32_t m =
 414                         fread(buf, 1, (l > (int32_t) sizeof(buf) ? (int32_t) sizeof(buf) : l),
 415                               fp);
 416                     if (!m) {
 417                         /*
 418                          * This shouldn't happen unless the file
 419                          * actually changes while we are reading
 420                          * it.
 421                          */
 422                         error(ERR_NONFATAL,
 423                               "`incbin': unexpected EOF while"
 424                               " reading file `%s'", fname);
 425                         t = 0;  /* Try to exit cleanly */
 426                         break;
 427                     }
 428                     out(offset, segment, buf, OUT_RAWDATA, m,
 429                         NO_SEG, NO_SEG);
 430                     l -= m;
 431                 }
 432             }
 433             list->downlevel(LIST_INCBIN);
 434             if (instruction->times > 1) {
 435                 /*
 436                  * Dummy call to list->output to give the offset to the
 437                  * listing module.
 438                  */
 439                 list->output(offset, NULL, OUT_RAWDATA, 0);
 440                 list->uplevel(LIST_TIMES);
 441                 list->downlevel(LIST_TIMES);
 442             }
 443             fclose(fp);
 444             return instruction->times * len;
 445         }
 446         return 0;               /* if we're here, there's an error */
 447     }
 448
 449     /* Check to see if we need an address-size prefix */
 450     add_asp(instruction, bits);
 451
 452     size_prob = false;
 453
 454     for (temp = nasm_instructions[instruction->opcode]; temp->opcode != -1; temp++){
 455         int m = matches(temp, instruction, bits);
 456         if (m == 100 ||
 457             (m == 99 && jmp_match(segment, offset, bits,
 458                                   instruction, temp->code))) {
 459             /* Matches! */
 460             int64_t insn_size = calcsize(segment, offset, bits,
 461                                       instruction, temp->code);
 462             itimes = instruction->times;
 463             if (insn_size < 0)  /* shouldn't be, on pass two */
 464                 error(ERR_PANIC, "errors made it through from pass one");
 465             else
 466                 while (itimes--) {
 467                     for (j = 0; j < MAXPREFIX; j++) {
 468                         uint8_t c = 0;
 469                         switch (instruction->prefixes[j]) {
 470                         case P_LOCK:
 471                             c = 0xF0;
 472                             break;
 473                         case P_REPNE:
 474                         case P_REPNZ:
 475                             c = 0xF2;
 476                             break;
 477                         case P_REPE:
 478                         case P_REPZ:
 479                         case P_REP:
 480                             c = 0xF3;
 481                             break;
 482                         case R_CS:
 483                             if (bits == 64) {
 484                                 error(ERR_WARNING | ERR_PASS2,
 485                                       "cs segment base generated, but will be ignored in 64-bit mode");
 486                             }
 487                             c = 0x2E;
 488                             break;
 489                         case R_DS:
 490                             if (bits == 64) {
 491                                 error(ERR_WARNING | ERR_PASS2,
 492                                       "ds segment base generated, but will be ignored in 64-bit mode");
 493                             }
 494                             c = 0x3E;
 495                             break;
 496                         case R_ES:
 497                            if (bits == 64) {
 498                                 error(ERR_WARNING | ERR_PASS2,
 499                                       "es segment base generated, but will be ignored in 64-bit mode");
 500                            }
 501                             c = 0x26;
 502                             break;
 503                         case R_FS:
 504                             c = 0x64;
 505                             break;
 506                         case R_GS:
 507                             c = 0x65;
 508                             break;
 509                         case R_SS:
 510                             if (bits == 64) {
 511                                 error(ERR_WARNING | ERR_PASS2,
 512                                       "ss segment base generated, but will be ignored in 64-bit mode");
 513                             }
 514                             c = 0x36;
 515                             break;
 516                         case R_SEGR6:
 517                         case R_SEGR7:
 518                             error(ERR_NONFATAL,
 519                                   "segr6 and segr7 cannot be used as prefixes");
 520                             break;
 521                         case P_A16:
 522                             if (bits == 64) {
 523                                 error(ERR_NONFATAL,
 524                                       "16-bit addressing is not supported "
 525                                       "in 64-bit mode");
 526                             } else if (bits != 16)
 527                                 c = 0x67;
 528                             break;
 529                         case P_A32:
 530                             if (bits != 32)
 531                                 c = 0x67;
 532                             break;
 533                         case P_A64:
 534                             if (bits != 64) {
 535                                 error(ERR_NONFATAL,
 536                                       "64-bit addressing is only supported "
 537                                       "in 64-bit mode");
 538                             }
 539                             break;
 540                         case P_ASP:
 541                             c = 0x67;
 542                             break;
 543                         case P_O16:
 544                             if (bits != 16)
 545                                 c = 0x66;
 546                             break;
 547                         case P_O32:
 548                             if (bits == 16)
 549                                 c = 0x66;
 550                             break;
 551                         case P_O64:
 552                             /* REX.W */
 553                             break;
 554                         case P_OSP:
 555                             c = 0x66;
 556                             break;
 557                         case P_none:
 558                             break;
 559                         default:
 560                             error(ERR_PANIC, "invalid instruction prefix");
 561                         }
 562                         if (c != 0) {
 563                             out(offset, segment, &c, OUT_RAWDATA, 1,
 564                                 NO_SEG, NO_SEG);
 565                             offset++;
 566                         }
 567                     }
 568                     insn_end = offset + insn_size;
 569                     gencode(segment, offset, bits, instruction,
 570                             temp, insn_end);
 571                     offset += insn_size;
 572                     if (itimes > 0 && itimes == instruction->times - 1) {
 573                         /*
 574                          * Dummy call to list->output to give the offset to the
 575                          * listing module.
 576                          */
 577                         list->output(offset, NULL, OUT_RAWDATA, 0);
 578                         list->uplevel(LIST_TIMES);
 579                     }
 580                 }
 581             if (instruction->times > 1)
 582                 list->downlevel(LIST_TIMES);
 583             return offset - start;
 584         } else if (m > 0 && m > size_prob) {
 585             size_prob = m;
 586         }
 587     }
 588
 589     if (temp->opcode == -1) {   /* didn't match any instruction */
 590         switch (size_prob) {
 591         case 1:
 592             error(ERR_NONFATAL, "operation size not specified");
 593             break;
 594         case 2:
 595             error(ERR_NONFATAL, "mismatch in operand sizes");
 596             break;
 597         case 3:
 598             error(ERR_NONFATAL, "no instruction for this cpu level");
 599             break;
 600         case 4:
 601             error(ERR_NONFATAL, "instruction not supported in 64-bit mode");
 602             break;
 603         default:
 604             error(ERR_NONFATAL,
 605                   "invalid combination of opcode and operands");
 606             break;
 607         }
 608     }
 609     return 0;
 610 }
 611
 612 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 613                insn * instruction, efunc error)
 614 {
 615     const struct itemplate *temp;
 616
 617     errfunc = error;            /* to pass to other functions */
 618     cpu = cp;
 619
 620     if (instruction->opcode == -1)
 621         return 0;
 622
 623     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 624         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 625         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 626         instruction->opcode == I_DY) {
 627         extop *e;
 628         int32_t isize, osize, wsize = 0;   /* placate gcc */
 629
 630         isize = 0;
 631         switch (instruction->opcode) {
 632         case I_DB:
 633             wsize = 1;
 634             break;
 635         case I_DW:
 636             wsize = 2;
 637             break;
 638         case I_DD:
 639             wsize = 4;
 640             break;
 641         case I_DQ:
 642             wsize = 8;
 643             break;
 644         case I_DT:
 645             wsize = 10;
 646             break;
 647         case I_DO:
 648             wsize = 16;
 649             break;
 650         case I_DY:
 651             wsize = 32;
 652             break;
 653         default:
 654             break;
 655         }
 656
 657         for (e = instruction->eops; e; e = e->next) {
 658             int32_t align;
 659
 660             osize = 0;
 661             if (e->type == EOT_DB_NUMBER)
 662                 osize = 1;
 663             else if (e->type == EOT_DB_STRING ||
 664                      e->type == EOT_DB_STRING_FREE)
 665                 osize = e->stringlen;
 666
 667             align = (-osize) % wsize;
 668             if (align < 0)
 669                 align += wsize;
 670             isize += osize + align;
 671         }
 672         return isize * instruction->times;
 673     }
 674
 675     if (instruction->opcode == I_INCBIN) {
 676         const char *fname = instruction->eops->stringval;
 677         FILE *fp;
 678         size_t len;
 679
 680         fp = fopen(fname, "rb");
 681         if (!fp)
 682             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 683                   fname);
 684         else if (fseek(fp, 0L, SEEK_END) < 0)
 685             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 686                   fname);
 687         else {
 688             len = ftell(fp);
 689             fclose(fp);
 690             if (instruction->eops->next) {
 691                 len -= instruction->eops->next->offset;
 692                 if (instruction->eops->next->next &&
 693                     len > (size_t)instruction->eops->next->next->offset) {
 694                     len = (size_t)instruction->eops->next->next->offset;
 695                 }
 696             }
 697             return instruction->times * len;
 698         }
 699         return 0;               /* if we're here, there's an error */
 700     }
 701
 702     /* Check to see if we need an address-size prefix */
 703     add_asp(instruction, bits);
 704
 705     for (temp = nasm_instructions[instruction->opcode]; temp->opcode != -1; temp++) {
 706         int m = matches(temp, instruction, bits);
 707         if (m == 100 ||
 708             (m == 99 && jmp_match(segment, offset, bits,
 709                                   instruction, temp->code))) {
 710             /* we've matched an instruction. */
 711             int64_t isize;
 712             const uint8_t *codes = temp->code;
 713             int j;
 714
 715             isize = calcsize(segment, offset, bits, instruction, codes);
 716             if (isize < 0)
 717                 return -1;
 718             for (j = 0; j < MAXPREFIX; j++) {
 719                 switch (instruction->prefixes[j]) {
 720                 case P_A16:
 721                     if (bits != 16)
 722                         isize++;
 723                     break;
 724                 case P_A32:
 725                     if (bits != 32)
 726                         isize++;
 727                     break;
 728                 case P_O16:
 729                     if (bits != 16)
 730                         isize++;
 731                     break;
 732                 case P_O32:
 733                     if (bits == 16)
 734                         isize++;
 735                     break;
 736                 case P_A64:
 737                 case P_O64:
 738                 case P_none:
 739                     break;
 740                 default:
 741                     isize++;
 742                     break;
 743                 }
 744             }
 745             return isize * instruction->times;
 746         }
 747     }
 748     return -1;                  /* didn't match any instruction */
 749 }
 750
 751 static bool possible_sbyte(operand *o)
 752 {
 753     return o->wrt == NO_SEG && o->segment == NO_SEG &&
 754         !(o->opflags & OPFLAG_FORWARD) &&
 755         optimizing >= 0 && !(o->type & STRICT);
 756 }
 757
 758 /* check that opn[op]  is a signed byte of size 16 or 32 */
 759 static bool is_sbyte16(operand *o)
 760 {
 761     int16_t v;
 762
 763     if (!possible_sbyte(o))
 764         return false;
 765
 766     v = o->offset;
 767     return v >= -128 && v <= 127;
 768 }
 769
 770 static bool is_sbyte32(operand *o)
 771 {
 772     int32_t v;
 773
 774     if (!possible_sbyte(o))
 775         return false;
 776
 777     v = o->offset;
 778     return v >= -128 && v <= 127;
 779 }
 780
 781 /* Common construct */
 782 #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
 783
 784 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 785                         insn * ins, const uint8_t *codes)
 786 {
 787     int64_t length = 0;
 788     uint8_t c;
 789     int rex_mask = ~0;
 790     int op1, op2;
 791     struct operand *opx;
 792     uint8_t opex = 0;
 793
 794     ins->rex = 0;               /* Ensure REX is reset */
 795
 796     if (ins->prefixes[PPS_OSIZE] == P_O64)
 797         ins->rex |= REX_W;
 798
 799     (void)segment;              /* Don't warn that this parameter is unused */
 800     (void)offset;               /* Don't warn that this parameter is unused */
 801
 802     while (*codes) {
 803         c = *codes++;
 804         op1 = (c & 3) + ((opex & 1) << 2);
 805         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
 806         opx = &ins->oprs[op1];
 807         opex = 0;               /* For the next iteration */
 808
 809         switch (c) {
 810         case 01:
 811         case 02:
 812         case 03:
 813         case 04:
 814             codes += c, length += c;
 815             break;
 816
 817         case 05:
 818         case 06:
 819         case 07:
 820             opex = c;
 821             break;
 822
 823         case4(010):
 824             ins->rex |=
 825                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 826             codes++, length++;
 827             break;
 828
 829         case4(014):
 830         case4(020):
 831         case4(024):
 832             length++;
 833             break;
 834
 835         case4(030):
 836             length += 2;
 837             break;
 838
 839         case4(034):
 840             if (opx->type & (BITS16 | BITS32 | BITS64))
 841                 length += (opx->type & BITS16) ? 2 : 4;
 842             else
 843                 length += (bits == 16) ? 2 : 4;
 844             break;
 845
 846         case4(040):
 847             length += 4;
 848             break;
 849
 850         case4(044):
 851             length += ins->addr_size >> 3;
 852             break;
 853
 854         case4(050):
 855             length++;
 856             break;
 857
 858         case4(054):
 859             length += 8; /* MOV reg64/imm */
 860             break;
 861
 862         case4(060):
 863             length += 2;
 864             break;
 865
 866         case4(064):
 867             if (opx->type & (BITS16 | BITS32 | BITS64))
 868                 length += (opx->type & BITS16) ? 2 : 4;
 869             else
 870                 length += (bits == 16) ? 2 : 4;
 871             break;
 872
 873         case4(070):
 874             length += 4;
 875             break;
 876
 877         case4(074):
 878             length += 2;
 879             break;
 880
 881         case4(0140):
 882             length += is_sbyte16(opx) ? 1 : 2;
 883             break;
 884
 885         case4(0144):
 886             codes++;
 887             length++;
 888             break;
 889
 890         case4(0150):
 891             length += is_sbyte32(opx) ? 1 : 4;
 892             break;
 893
 894         case4(0154):
 895             codes++;
 896             length++;
 897             break;
 898
 899         case4(0160):
 900             length++;
 901             ins->rex |= REX_D;
 902             ins->drexdst = regval(opx);
 903             break;
 904
 905         case4(0164):
 906             length++;
 907             ins->rex |= REX_D|REX_OC;
 908             ins->drexdst = regval(opx);
 909             break;
 910
 911         case 0171:
 912             break;
 913
 914         case 0172:
 915         case 0173:
 916         case 0174:
 917             codes++;
 918             length++;
 919             break;
 920
 921         case4(0250):
 922             length += is_sbyte32(opx) ? 1 : 4;
 923             break;
 924
 925         case4(0254):
 926             length += 4;
 927             break;
 928
 929         case4(0260):
 930             ins->rex |= REX_V;
 931             ins->drexdst = regval(opx);
 932             ins->vex_m = *codes++;
 933             ins->vex_wlp = *codes++;
 934             break;
 935
 936         case 0270:
 937             ins->rex |= REX_V;
 938             ins->drexdst = 0;
 939             ins->vex_m = *codes++;
 940             ins->vex_wlp = *codes++;
 941             break;
 942
 943         case4(0274):
 944             length++;
 945             break;
 946
 947         case4(0300):
 948             break;
 949
 950         case 0310:
 951             if (bits == 64)
 952                 return -1;
 953             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
 954             break;
 955
 956         case 0311:
 957             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
 958             break;
 959
 960         case 0312:
 961             break;
 962
 963         case 0313:
 964             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
 965                 has_prefix(ins, PPS_ASIZE, P_A32))
 966                 return -1;
 967             break;
 968
 969         case4(0314):
 970             break;
 971
 972         case 0320:
 973             length += (bits != 16);
 974             break;
 975
 976         case 0321:
 977             length += (bits == 16);
 978             break;
 979
 980         case 0322:
 981             break;
 982
 983         case 0323:
 984             rex_mask &= ~REX_W;
 985             break;
 986
 987         case 0324:
 988             ins->rex |= REX_W;
 989             break;
 990
 991         case 0330:
 992             codes++, length++;
 993             break;
 994
 995         case 0331:
 996             break;
 997
 998         case 0332:
 999         case 0333:
1000             length++;
1001             break;
1002
1003         case 0334:
1004             ins->rex |= REX_L;
1005             break;
1006
1007         case 0335:
1008             break;
1009
1010         case 0336:
1011             if (!ins->prefixes[PPS_LREP])
1012                 ins->prefixes[PPS_LREP] = P_REP;
1013             break;
1014
1015         case 0337:
1016             if (!ins->prefixes[PPS_LREP])
1017                 ins->prefixes[PPS_LREP] = P_REPNE;
1018             break;
1019
1020         case 0340:
1021             if (ins->oprs[0].segment != NO_SEG)
1022                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1023                         " quantity of BSS space");
1024             else
1025                 length += ins->oprs[0].offset;
1026             break;
1027
1028         case4(0344):
1029             length++;
1030             break;
1031
1032         case 0360:
1033             break;
1034
1035         case 0361:
1036         case 0362:
1037         case 0363:
1038             length++;
1039             break;
1040
1041         case 0364:
1042         case 0365:
1043             break;
1044
1045         case 0366:
1046         case 0367:
1047             length++;
1048             break;
1049
1050         case 0370:
1051         case 0371:
1052         case 0372:
1053             break;
1054
1055         case 0373:
1056             length++;
1057             break;
1058
1059         case4(0100):
1060         case4(0110):
1061         case4(0120):
1062         case4(0130):
1063         case4(0200):
1064         case4(0204):
1065         case4(0210):
1066         case4(0214):
1067         case4(0220):
1068         case4(0224):
1069         case4(0230):
1070         case4(0234):
1071             {
1072                 ea ea_data;
1073                 int rfield;
1074                 int32_t rflags;
1075                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1076
1077                 if (c <= 0177) {
1078                     /* pick rfield from operand b */
1079                     rflags = regflag(&ins->oprs[op1]);
1080                     rfield = nasm_regvals[ins->oprs[op1].basereg];
1081                 } else {
1082                     rflags = 0;
1083                     rfield = c & 7;
1084                 }
1085                 if (!process_ea(&ins->oprs[op2], &ea_data, bits,
1086                                 ins->addr_size, rfield, rflags)) {
1087                     errfunc(ERR_NONFATAL, "invalid effective address");
1088                     return -1;
1089                 } else {
1090                     ins->rex |= ea_data.rex;
1091                     length += ea_data.size;
1092                 }
1093             }
1094             break;
1095
1096         default:
1097             errfunc(ERR_PANIC, "internal instruction table corrupt"
1098                     ": instruction code 0x%02X given", c);
1099             break;
1100         }
1101     }
1102
1103     ins->rex &= rex_mask;
1104
1105     if (ins->rex & REX_V) {
1106         int bad32 = REX_R|REX_W|REX_X|REX_B;
1107
1108         if (ins->rex & REX_H) {
1109             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1110             return -1;
1111         }
1112         switch (ins->vex_wlp & 030) {
1113         case 000:
1114         case 020:
1115             ins->rex &= ~REX_W;
1116             break;
1117         case 010:
1118             ins->rex |= REX_W;
1119             bad32 &= ~REX_W;
1120             break;
1121         case 030:
1122             /* Follow REX_W */
1123             break;
1124         }
1125
1126         if (bits != 64 && ((ins->rex & bad32) || ins->drexdst > 7)) {
1127             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1128             return -1;
1129         }
1130         if (ins->vex_m != 1 || (ins->rex & (REX_W|REX_R|REX_B)))
1131             length += 3;
1132         else
1133             length += 2;
1134     } else if (ins->rex & REX_D) {
1135         if (ins->rex & REX_H) {
1136             errfunc(ERR_NONFATAL, "cannot use high register in drex instruction");
1137             return -1;
1138         }
1139         if (bits != 64 && ((ins->rex & (REX_R|REX_W|REX_X|REX_B)) ||
1140                            ins->drexdst > 7)) {
1141             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1142             return -1;
1143         }
1144         length++;
1145     } else if (ins->rex & REX_REAL) {
1146         if (ins->rex & REX_H) {
1147             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1148             return -1;
1149         } else if (bits == 64) {
1150             length++;
1151         } else if ((ins->rex & REX_L) &&
1152                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1153                    cpu >= IF_X86_64) {
1154             /* LOCK-as-REX.R */
1155             assert_no_prefix(ins, PPS_LREP);
1156             length++;
1157         } else {
1158             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1159             return -1;
1160         }
1161     }
1162
1163     return length;
1164 }
1165
1166 #define EMIT_REX()                                                      \
1167     if (!(ins->rex & (REX_D|REX_V)) && (ins->rex & REX_REAL) && (bits == 64)) { \
1168         ins->rex = (ins->rex & REX_REAL)|REX_P;                         \
1169         out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG); \
1170         ins->rex = 0;                                                   \
1171         offset += 1; \
1172     }
1173
1174 static void gencode(int32_t segment, int64_t offset, int bits,
1175                     insn * ins, const struct itemplate *temp,
1176                     int64_t insn_end)
1177 {
1178     static char condval[] = {   /* conditional opcodes */
1179         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1180         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1181         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1182     };
1183     uint8_t c;
1184     uint8_t bytes[4];
1185     int64_t size;
1186     int64_t data;
1187     int op1, op2;
1188     struct operand *opx;
1189     const uint8_t *codes = temp->code;
1190     uint8_t opex = 0;
1191
1192     while (*codes) {
1193         c = *codes++;
1194         op1 = (c & 3) + ((opex & 1) << 2);
1195         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
1196         opx = &ins->oprs[op1];
1197         opex = 0;               /* For the next iteration */
1198
1199         switch (c) {
1200         case 01:
1201         case 02:
1202         case 03:
1203         case 04:
1204             EMIT_REX();
1205             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1206             codes += c;
1207             offset += c;
1208             break;
1209
1210         case 05:
1211         case 06:
1212         case 07:
1213             opex = c;
1214             break;
1215
1216         case4(010):
1217             EMIT_REX();
1218             bytes[0] = *codes++ + (regval(opx) & 7);
1219             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1220             offset += 1;
1221             break;
1222
1223         case4(014):
1224             /* The test for BITS8 and SBYTE here is intended to avoid
1225                warning on optimizer actions due to SBYTE, while still
1226                warn on explicit BYTE directives.  Also warn, obviously,
1227                if the optimizer isn't enabled. */
1228             if (((opx->type & BITS8) ||
1229                  !(opx->type & temp->opd[op1] & BYTENESS)) &&
1230                 (opx->offset < -128 || opx->offset > 127)) {
1231                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1232                         "signed byte value exceeds bounds");
1233             }
1234             if (opx->segment != NO_SEG) {
1235                 data = opx->offset;
1236                 out(offset, segment, &data, OUT_ADDRESS, 1,
1237                     opx->segment, opx->wrt);
1238             } else {
1239                 bytes[0] = opx->offset;
1240                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1241                     NO_SEG);
1242             }
1243             offset += 1;
1244             break;
1245
1246         case4(020):
1247             if (opx->offset < -256 || opx->offset > 255) {
1248                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1249                         "byte value exceeds bounds");
1250             }
1251             if (opx->segment != NO_SEG) {
1252                 data = opx->offset;
1253                 out(offset, segment, &data, OUT_ADDRESS, 1,
1254                     opx->segment, opx->wrt);
1255             } else {
1256                 bytes[0] = opx->offset;
1257                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1258                     NO_SEG);
1259             }
1260             offset += 1;
1261             break;
1262
1263         case4(024):
1264             if (opx->offset < 0 || opx->offset > 255)
1265                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1266                         "unsigned byte value exceeds bounds");
1267             if (opx->segment != NO_SEG) {
1268                 data = opx->offset;
1269                 out(offset, segment, &data, OUT_ADDRESS, 1,
1270                     opx->segment, opx->wrt);
1271             } else {
1272                 bytes[0] = opx->offset;
1273                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1274                     NO_SEG);
1275             }
1276             offset += 1;
1277             break;
1278
1279         case4(030):
1280             warn_overflow(2, opx);
1281             data = opx->offset;
1282             out(offset, segment, &data, OUT_ADDRESS, 2,
1283                 opx->segment, opx->wrt);
1284             offset += 2;
1285             break;
1286
1287         case4(034):
1288             if (opx->type & (BITS16 | BITS32))
1289                 size = (opx->type & BITS16) ? 2 : 4;
1290             else
1291                 size = (bits == 16) ? 2 : 4;
1292             warn_overflow(size, opx);
1293             data = opx->offset;
1294             out(offset, segment, &data, OUT_ADDRESS, size,
1295                 opx->segment, opx->wrt);
1296             offset += size;
1297             break;
1298
1299         case4(040):
1300             warn_overflow(4, opx);
1301             data = opx->offset;
1302             out(offset, segment, &data, OUT_ADDRESS, 4,
1303                 opx->segment, opx->wrt);
1304             offset += 4;
1305             break;
1306
1307         case4(044):
1308             data = opx->offset;
1309             size = ins->addr_size >> 3;
1310             warn_overflow(size, opx);
1311             out(offset, segment, &data, OUT_ADDRESS, size,
1312                 opx->segment, opx->wrt);
1313             offset += size;
1314             break;
1315
1316         case4(050):
1317             if (opx->segment != segment)
1318                 errfunc(ERR_NONFATAL,
1319                         "short relative jump outside segment");
1320             data = opx->offset - insn_end;
1321             if (data > 127 || data < -128)
1322                 errfunc(ERR_NONFATAL, "short jump is out of range");
1323             bytes[0] = data;
1324             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1325             offset += 1;
1326             break;
1327
1328         case4(054):
1329             data = (int64_t)opx->offset;
1330             out(offset, segment, &data, OUT_ADDRESS, 8,
1331                 opx->segment, opx->wrt);
1332             offset += 8;
1333             break;
1334
1335         case4(060):
1336             if (opx->segment != segment) {
1337                 data = opx->offset;
1338                 out(offset, segment, &data,
1339                     OUT_REL2ADR, insn_end - offset,
1340                     opx->segment, opx->wrt);
1341             } else {
1342                 data = opx->offset - insn_end;
1343                 out(offset, segment, &data,
1344                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1345             }
1346             offset += 2;
1347             break;
1348
1349         case4(064):
1350             if (opx->type & (BITS16 | BITS32 | BITS64))
1351                 size = (opx->type & BITS16) ? 2 : 4;
1352             else
1353                 size = (bits == 16) ? 2 : 4;
1354             if (opx->segment != segment) {
1355                 data = opx->offset;
1356                 out(offset, segment, &data,
1357                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1358                     insn_end - offset, opx->segment, opx->wrt);
1359             } else {
1360                 data = opx->offset - insn_end;
1361                 out(offset, segment, &data,
1362                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1363             }
1364             offset += size;
1365             break;
1366
1367         case4(070):
1368             if (opx->segment != segment) {
1369                 data = opx->offset;
1370                 out(offset, segment, &data,
1371                     OUT_REL4ADR, insn_end - offset,
1372                     opx->segment, opx->wrt);
1373             } else {
1374                 data = opx->offset - insn_end;
1375                 out(offset, segment, &data,
1376                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1377             }
1378             offset += 4;
1379             break;
1380
1381         case4(074):
1382             if (opx->segment == NO_SEG)
1383                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1384                         " relocatable");
1385             data = 0;
1386             out(offset, segment, &data, OUT_ADDRESS, 2,
1387                 outfmt->segbase(1 + opx->segment),
1388                 opx->wrt);
1389             offset += 2;
1390             break;
1391
1392         case4(0140):
1393             data = opx->offset;
1394             warn_overflow(2, opx);
1395             if (is_sbyte16(opx)) {
1396                 bytes[0] = data;
1397                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1398                     NO_SEG);
1399                 offset++;
1400             } else {
1401                 out(offset, segment, &data, OUT_ADDRESS, 2,
1402                     opx->segment, opx->wrt);
1403                 offset += 2;
1404             }
1405             break;
1406
1407         case4(0144):
1408             EMIT_REX();
1409             bytes[0] = *codes++;
1410             if (is_sbyte16(opx))
1411                 bytes[0] |= 2;  /* s-bit */
1412             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1413             offset++;
1414             break;
1415
1416         case4(0150):
1417             data = opx->offset;
1418             warn_overflow(4, opx);
1419             if (is_sbyte32(opx)) {
1420                 bytes[0] = data;
1421                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1422                     NO_SEG);
1423                 offset++;
1424             } else {
1425                 out(offset, segment, &data, OUT_ADDRESS, 4,
1426                     opx->segment, opx->wrt);
1427                 offset += 4;
1428             }
1429             break;
1430
1431         case4(0154):
1432             EMIT_REX();
1433             bytes[0] = *codes++;
1434             if (is_sbyte32(opx))
1435                 bytes[0] |= 2;  /* s-bit */
1436             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1437             offset++;
1438             break;
1439
1440         case4(0160):
1441         case4(0164):
1442             break;
1443
1444         case 0171:
1445             bytes[0] =
1446                 (ins->drexdst << 4) |
1447                 (ins->rex & REX_OC ? 0x08 : 0) |
1448                 (ins->rex & (REX_R|REX_X|REX_B));
1449             ins->rex = 0;
1450             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1451             offset++;
1452             break;
1453
1454         case 0172:
1455             c = *codes++;
1456             opx = &ins->oprs[c >> 3];
1457             bytes[0] = nasm_regvals[opx->basereg] << 4;
1458             opx = &ins->oprs[c & 7];
1459             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1460                 errfunc(ERR_NONFATAL,
1461                         "non-absolute expression not permitted as argument %d",
1462                         c & 7);
1463             } else {
1464                 if (opx->offset & ~15) {
1465                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1466                             "four-bit argument exceeds bounds");
1467                 }
1468                 bytes[0] |= opx->offset & 15;
1469             }
1470             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1471             offset++;
1472             break;
1473
1474         case 0173:
1475             c = *codes++;
1476             opx = &ins->oprs[c >> 4];
1477             bytes[0] = nasm_regvals[opx->basereg] << 4;
1478             bytes[0] |= c & 15;
1479             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1480             offset++;
1481             break;
1482
1483         case 0174:
1484             c = *codes++;
1485             opx = &ins->oprs[c];
1486             bytes[0] = nasm_regvals[opx->basereg] << 4;
1487             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1488             offset++;
1489             break;
1490
1491         case4(0250):
1492             data = opx->offset;
1493             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1494                 (int32_t)data != (int64_t)data) {
1495                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1496                         "signed dword immediate exceeds bounds");
1497             }
1498             if (is_sbyte32(opx)) {
1499                 bytes[0] = data;
1500                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1501                     NO_SEG);
1502                 offset++;
1503             } else {
1504                 out(offset, segment, &data, OUT_ADDRESS, 4,
1505                     opx->segment, opx->wrt);
1506                 offset += 4;
1507             }
1508             break;
1509
1510         case4(0254):
1511             data = opx->offset;
1512             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1513                 (int32_t)data != (int64_t)data) {
1514                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1515                         "signed dword immediate exceeds bounds");
1516             }
1517             out(offset, segment, &data, OUT_ADDRESS, 4,
1518                 opx->segment, opx->wrt);
1519             offset += 4;
1520             break;
1521
1522         case4(0260):
1523         case 0270:
1524             codes += 2;
1525             if (ins->vex_m != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1526                 bytes[0] = 0xc4;
1527                 bytes[1] = ins->vex_m | ((~ins->rex & 7) << 5);
1528                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1529                     ((~ins->drexdst & 15)<< 3) | (ins->vex_wlp & 07);
1530                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1531                 offset += 3;
1532             } else {
1533                 bytes[0] = 0xc5;
1534                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1535                     ((~ins->drexdst & 15) << 3) | (ins->vex_wlp & 07);
1536                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1537                 offset += 2;
1538             }
1539             break;
1540
1541         case4(0274):
1542         {
1543             uint64_t uv, um;
1544             int s;
1545
1546             if (ins->rex & REX_W)
1547                 s = 64;
1548             else if (ins->prefixes[PPS_OSIZE] == P_O16)
1549                 s = 16;
1550             else if (ins->prefixes[PPS_OSIZE] == P_O32)
1551                 s = 32;
1552             else
1553                 s = bits;
1554
1555             um = (uint64_t)2 << (s-1);
1556             uv = opx->offset;
1557
1558             if (uv > 127 && uv < (uint64_t)-128 &&
1559                 (uv < um-128 || uv > um-1)) {
1560                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1561                         "signed byte value exceeds bounds");
1562             }
1563             if (opx->segment != NO_SEG) {
1564                 data = uv;
1565                 out(offset, segment, &data, OUT_ADDRESS, 1,
1566                     opx->segment, opx->wrt);
1567             } else {
1568                 bytes[0] = uv;
1569                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1570                     NO_SEG);
1571             }
1572             offset += 1;
1573             break;
1574         }
1575
1576         case4(0300):
1577             break;
1578
1579         case 0310:
1580             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1581                 *bytes = 0x67;
1582                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1583                 offset += 1;
1584             } else
1585                 offset += 0;
1586             break;
1587
1588         case 0311:
1589             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1590                 *bytes = 0x67;
1591                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1592                 offset += 1;
1593             } else
1594                 offset += 0;
1595             break;
1596
1597         case 0312:
1598             break;
1599
1600         case 0313:
1601             ins->rex = 0;
1602             break;
1603
1604         case4(0314):
1605             break;
1606
1607         case 0320:
1608             if (bits != 16) {
1609                 *bytes = 0x66;
1610                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1611                 offset += 1;
1612             } else
1613                 offset += 0;
1614             break;
1615
1616         case 0321:
1617             if (bits == 16) {
1618                 *bytes = 0x66;
1619                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1620                 offset += 1;
1621             } else
1622                 offset += 0;
1623             break;
1624
1625         case 0322:
1626         case 0323:
1627             break;
1628
1629         case 0324:
1630             ins->rex |= REX_W;
1631             break;
1632
1633         case 0330:
1634             *bytes = *codes++ ^ condval[ins->condition];
1635             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1636             offset += 1;
1637             break;
1638
1639         case 0331:
1640             break;
1641
1642         case 0332:
1643         case 0333:
1644             *bytes = c - 0332 + 0xF2;
1645             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1646             offset += 1;
1647             break;
1648
1649         case 0334:
1650             if (ins->rex & REX_R) {
1651                 *bytes = 0xF0;
1652                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1653                 offset += 1;
1654             }
1655             ins->rex &= ~(REX_L|REX_R);
1656             break;
1657
1658         case 0335:
1659             break;
1660
1661         case 0336:
1662         case 0337:
1663             break;
1664
1665         case 0340:
1666             if (ins->oprs[0].segment != NO_SEG)
1667                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1668             else {
1669                 int64_t size = ins->oprs[0].offset;
1670                 if (size > 0)
1671                     out(offset, segment, NULL,
1672                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1673                 offset += size;
1674             }
1675             break;
1676
1677         case 0344:
1678         case 0345:
1679             bytes[0] = c & 1;
1680             switch (ins->oprs[0].basereg) {
1681             case R_CS:
1682                 bytes[0] += 0x0E;
1683                 break;
1684             case R_DS:
1685                 bytes[0] += 0x1E;
1686                 break;
1687             case R_ES:
1688                 bytes[0] += 0x06;
1689                 break;
1690             case R_SS:
1691                 bytes[0] += 0x16;
1692                 break;
1693             default:
1694                 errfunc(ERR_PANIC,
1695                         "bizarre 8086 segment register received");
1696             }
1697             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1698             offset++;
1699             break;
1700
1701         case 0346:
1702         case 0347:
1703             bytes[0] = c & 1;
1704             switch (ins->oprs[0].basereg) {
1705             case R_FS:
1706                 bytes[0] += 0xA0;
1707                 break;
1708             case R_GS:
1709                 bytes[0] += 0xA8;
1710                 break;
1711             default:
1712                 errfunc(ERR_PANIC,
1713                         "bizarre 386 segment register received");
1714             }
1715             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1716             offset++;
1717             break;
1718
1719         case 0360:
1720             break;
1721
1722         case 0361:
1723             bytes[0] = 0x66;
1724             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1725             offset += 1;
1726             break;
1727
1728         case 0362:
1729         case 0363:
1730             bytes[0] = c - 0362 + 0xf2;
1731             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1732             offset += 1;
1733             break;
1734
1735         case 0364:
1736         case 0365:
1737             break;
1738
1739         case 0366:
1740         case 0367:
1741             *bytes = c - 0366 + 0x66;
1742             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1743             offset += 1;
1744             break;
1745
1746         case 0370:
1747         case 0371:
1748         case 0372:
1749             break;
1750
1751         case 0373:
1752             *bytes = bits == 16 ? 3 : 5;
1753             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1754             offset += 1;
1755             break;
1756
1757         case4(0100):
1758         case4(0110):
1759         case4(0120):
1760         case4(0130):
1761         case4(0200):
1762         case4(0204):
1763         case4(0210):
1764         case4(0214):
1765         case4(0220):
1766         case4(0224):
1767         case4(0230):
1768         case4(0234):
1769             {
1770                 ea ea_data;
1771                 int rfield;
1772                 int32_t rflags;
1773                 uint8_t *p;
1774                 int32_t s;
1775                 enum out_type type;
1776                 struct operand *opy = &ins->oprs[op2];
1777
1778                 if (c <= 0177) {
1779                     /* pick rfield from operand b (opx) */
1780                     rflags = regflag(opx);
1781                     rfield = nasm_regvals[opx->basereg];
1782                 } else {
1783                     /* rfield is constant */
1784                     rflags = 0;
1785                     rfield = c & 7;
1786                 }
1787
1788                 if (!process_ea(opy, &ea_data, bits, ins->addr_size,
1789                                 rfield, rflags)) {
1790                     errfunc(ERR_NONFATAL, "invalid effective address");
1791                 }
1792
1793
1794                 p = bytes;
1795                 *p++ = ea_data.modrm;
1796                 if (ea_data.sib_present)
1797                     *p++ = ea_data.sib;
1798
1799                 /* DREX suffixes come between the SIB and the displacement */
1800                 if (ins->rex & REX_D) {
1801                     *p++ = (ins->drexdst << 4) |
1802                            (ins->rex & REX_OC ? 0x08 : 0) |
1803                            (ins->rex & (REX_R|REX_X|REX_B));
1804                     ins->rex = 0;
1805                 }
1806
1807                 s = p - bytes;
1808                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1809
1810                 /*
1811                  * Make sure the address gets the right offset in case
1812                  * the line breaks in the .lst file (BR 1197827)
1813                  */
1814                 offset += s;
1815                 s = 0;
1816
1817                 switch (ea_data.bytes) {
1818                 case 0:
1819                     break;
1820                 case 1:
1821                     if (opy->segment != NO_SEG) {
1822                         data = opy->offset;
1823                         out(offset, segment, &data, OUT_ADDRESS, 1,
1824                             opy->segment, opy->wrt);
1825                     } else {
1826                         *bytes = opy->offset;
1827                         out(offset, segment, bytes, OUT_RAWDATA, 1,
1828                             NO_SEG, NO_SEG);
1829                     }
1830                     s++;
1831                     break;
1832                 case 8:
1833                 case 2:
1834                 case 4:
1835                     data = opy->offset;
1836                     warn_overflow(ea_data.bytes, opy);
1837                     s += ea_data.bytes;
1838                     if (ea_data.rip) {
1839                         if (opy->segment == segment) {
1840                             data -= insn_end;
1841                             out(offset, segment, &data, OUT_ADDRESS,
1842                                 ea_data.bytes, NO_SEG, NO_SEG);
1843                         } else {
1844                             out(offset, segment, &data, OUT_REL4ADR,
1845                                 insn_end - offset, opy->segment, opy->wrt);
1846                         }
1847                     } else {
1848                         type = OUT_ADDRESS;
1849                         out(offset, segment, &data, OUT_ADDRESS,
1850                             ea_data.bytes, opy->segment, opy->wrt);
1851                     }
1852                     break;
1853                 }
1854                 offset += s;
1855             }
1856             break;
1857
1858         default:
1859             errfunc(ERR_PANIC, "internal instruction table corrupt"
1860                     ": instruction code 0x%02X given", c);
1861             break;
1862         }
1863     }
1864 }
1865
1866 static int32_t regflag(const operand * o)
1867 {
1868     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1869         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1870     }
1871     return nasm_reg_flags[o->basereg];
1872 }
1873
1874 static int32_t regval(const operand * o)
1875 {
1876     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1877         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1878     }
1879     return nasm_regvals[o->basereg];
1880 }
1881
1882 static int op_rexflags(const operand * o, int mask)
1883 {
1884     int32_t flags;
1885     int val;
1886
1887     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1888         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1889     }
1890
1891     flags = nasm_reg_flags[o->basereg];
1892     val = nasm_regvals[o->basereg];
1893
1894     return rexflags(val, flags, mask);
1895 }
1896
1897 static int rexflags(int val, int32_t flags, int mask)
1898 {
1899     int rex = 0;
1900
1901     if (val >= 8)
1902         rex |= REX_B|REX_X|REX_R;
1903     if (flags & BITS64)
1904         rex |= REX_W;
1905     if (!(REG_HIGH & ~flags))   /* AH, CH, DH, BH */
1906         rex |= REX_H;
1907     else if (!(REG8 & ~flags) && val >= 4) /* SPL, BPL, SIL, DIL */
1908         rex |= REX_P;
1909
1910     return rex & mask;
1911 }
1912
1913 static int matches(const struct itemplate *itemp, insn * instruction, int bits)
1914 {
1915     int i, size[MAX_OPERANDS], asize, oprs, ret;
1916
1917     ret = 100;
1918
1919     /*
1920      * Check the opcode
1921      */
1922     if (itemp->opcode != instruction->opcode)
1923         return 0;
1924
1925     /*
1926      * Count the operands
1927      */
1928     if (itemp->operands != instruction->operands)
1929         return 0;
1930
1931     /*
1932      * Check that no spurious colons or TOs are present
1933      */
1934     for (i = 0; i < itemp->operands; i++)
1935         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
1936             return 0;
1937
1938     /*
1939      * Process size flags
1940      */
1941     if (itemp->flags & IF_ARMASK) {
1942         memset(size, 0, sizeof size);
1943
1944         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
1945
1946         switch (itemp->flags & IF_SMASK) {
1947         case IF_SB:
1948             size[i] = BITS8;
1949             break;
1950         case IF_SW:
1951             size[i] = BITS16;
1952             break;
1953         case IF_SD:
1954             size[i] = BITS32;
1955             break;
1956         case IF_SQ:
1957             size[i] = BITS64;
1958             break;
1959         case IF_SO:
1960             size[i] = BITS128;
1961             break;
1962         case IF_SY:
1963             size[i] = BITS256;
1964             break;
1965         case IF_SZ:
1966             switch (bits) {
1967             case 16:
1968                 size[i] = BITS16;
1969                 break;
1970             case 32:
1971                 size[i] = BITS32;
1972                 break;
1973             case 64:
1974                 size[i] = BITS64;
1975                 break;
1976             }
1977             break;
1978         default:
1979             break;
1980         }
1981     } else {
1982         asize = 0;
1983         switch (itemp->flags & IF_SMASK) {
1984         case IF_SB:
1985             asize = BITS8;
1986             break;
1987         case IF_SW:
1988             asize = BITS16;
1989             break;
1990         case IF_SD:
1991             asize = BITS32;
1992             break;
1993         case IF_SQ:
1994             asize = BITS64;
1995             break;
1996         case IF_SO:
1997             asize = BITS128;
1998             break;
1999         case IF_SY:
2000             asize = BITS256;
2001             break;
2002         case IF_SZ:
2003             switch (bits) {
2004             case 16:
2005                 asize = BITS16;
2006                 break;
2007             case 32:
2008                 asize = BITS32;
2009                 break;
2010             case 64:
2011                 asize = BITS64;
2012                 break;
2013             }
2014             break;
2015         default:
2016             break;
2017         }
2018         for (i = 0; i < MAX_OPERANDS; i++)
2019             size[i] = asize;
2020     }
2021
2022     /*
2023      * Check that the operand flags all match up
2024      */
2025     for (i = 0; i < itemp->operands; i++) {
2026         int32_t type = instruction->oprs[i].type;
2027         if (!(type & SIZE_MASK))
2028             type |= size[i];
2029
2030         if (itemp->opd[i] & SAME_AS) {
2031             int j = itemp->opd[i] & ~SAME_AS;
2032             if (type != instruction->oprs[j].type ||
2033                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2034                 return 0;
2035         } else if (itemp->opd[i] & ~type ||
2036             ((itemp->opd[i] & SIZE_MASK) &&
2037              ((itemp->opd[i] ^ type) & SIZE_MASK))) {
2038             if ((itemp->opd[i] & ~type & ~SIZE_MASK) ||
2039                 (type & SIZE_MASK))
2040                 return 0;
2041             else
2042                 return 1;
2043         }
2044     }
2045
2046     /*
2047      * Check operand sizes
2048      */
2049     if (itemp->flags & (IF_SM | IF_SM2)) {
2050         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2051         asize = 0;
2052         for (i = 0; i < oprs; i++) {
2053             if ((asize = itemp->opd[i] & SIZE_MASK) != 0) {
2054                 int j;
2055                 for (j = 0; j < oprs; j++)
2056                     size[j] = asize;
2057                 break;
2058             }
2059         }
2060     } else {
2061         oprs = itemp->operands;
2062     }
2063
2064     for (i = 0; i < itemp->operands; i++) {
2065         if (!(itemp->opd[i] & SIZE_MASK) &&
2066             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2067             return 2;
2068     }
2069
2070     /*
2071      * Check template is okay at the set cpu level
2072      */
2073     if (((itemp->flags & IF_PLEVEL) > cpu))
2074         return 3;
2075
2076     /*
2077      * Check if instruction is available in long mode
2078      */
2079     if ((itemp->flags & IF_NOLONG) && (bits == 64))
2080         return 4;
2081
2082     /*
2083      * Check if special handling needed for Jumps
2084      */
2085     if ((uint8_t)(itemp->code[0]) >= 0370)
2086         return 99;
2087
2088     return ret;
2089 }
2090
2091 static ea *process_ea(operand * input, ea * output, int bits,
2092                       int addrbits, int rfield, int32_t rflags)
2093 {
2094     bool forw_ref = !!(input->opflags & OPFLAG_FORWARD);
2095
2096     output->rip = false;
2097
2098     /* REX flags for the rfield operand */
2099     output->rex |= rexflags(rfield, rflags, REX_R|REX_P|REX_W|REX_H);
2100
2101     if (!(REGISTER & ~input->type)) {   /* register direct */
2102         int i;
2103         int32_t f;
2104
2105         if (input->basereg < EXPR_REG_START /* Verify as Register */
2106             || input->basereg >= REG_ENUM_LIMIT)
2107             return NULL;
2108         f = regflag(input);
2109         i = nasm_regvals[input->basereg];
2110
2111         if (REG_EA & ~f)
2112             return NULL;        /* Invalid EA register */
2113
2114         output->rex |= op_rexflags(input, REX_B|REX_P|REX_W|REX_H);
2115
2116         output->sib_present = false;             /* no SIB necessary */
2117         output->bytes = 0;  /* no offset necessary either */
2118         output->modrm = 0xC0 | ((rfield & 7) << 3) | (i & 7);
2119     } else {                    /* it's a memory reference */
2120         if (input->basereg == -1
2121             && (input->indexreg == -1 || input->scale == 0)) {
2122             /* it's a pure offset */
2123             if (bits == 64 && (~input->type & IP_REL)) {
2124               int scale, index, base;
2125               output->sib_present = true;
2126               scale = 0;
2127               index = 4;
2128               base = 5;
2129               output->sib = (scale << 6) | (index << 3) | base;
2130               output->bytes = 4;
2131               output->modrm = 4 | ((rfield & 7) << 3);
2132               output->rip = false;
2133             } else {
2134               output->sib_present = false;
2135               output->bytes = (addrbits != 16 ? 4 : 2);
2136               output->modrm = (addrbits != 16 ? 5 : 6) | ((rfield & 7) << 3);
2137               output->rip = bits == 64;
2138             }
2139         } else {                /* it's an indirection */
2140             int i = input->indexreg, b = input->basereg, s = input->scale;
2141             int32_t o = input->offset, seg = input->segment;
2142             int hb = input->hintbase, ht = input->hinttype;
2143             int t;
2144             int it, bt;
2145             int32_t ix, bx;     /* register flags */
2146
2147             if (s == 0)
2148                 i = -1;         /* make this easy, at least */
2149
2150             if (i >= EXPR_REG_START && i < REG_ENUM_LIMIT) {
2151                 it = nasm_regvals[i];
2152                 ix = nasm_reg_flags[i];
2153             } else {
2154                 it = -1;
2155                 ix = 0;
2156             }
2157
2158             if (b >= EXPR_REG_START && b < REG_ENUM_LIMIT) {
2159                 bt = nasm_regvals[b];
2160                 bx = nasm_reg_flags[b];
2161             } else {
2162                 bt = -1;
2163                 bx = 0;
2164             }
2165
2166             /* check for a 32/64-bit memory reference... */
2167             if ((ix|bx) & (BITS32|BITS64)) {
2168                 /* it must be a 32/64-bit memory reference. Firstly we have
2169                  * to check that all registers involved are type E/Rxx. */
2170                 int32_t sok = BITS32|BITS64;
2171
2172                 if (it != -1) {
2173                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2174                         sok &= ix;
2175                     else
2176                         return NULL;
2177                 }
2178
2179                 if (bt != -1) {
2180                     if (REG_GPR & ~bx)
2181                         return NULL; /* Invalid register */
2182                     if (~sok & bx & SIZE_MASK)
2183                         return NULL; /* Invalid size */
2184                     sok &= bx;
2185                 }
2186
2187                 /* While we're here, ensure the user didn't specify
2188                    WORD or QWORD. */
2189                 if (input->disp_size == 16 || input->disp_size == 64)
2190                     return NULL;
2191
2192                 if (addrbits == 16 ||
2193                     (addrbits == 32 && !(sok & BITS32)) ||
2194                     (addrbits == 64 && !(sok & BITS64)))
2195                     return NULL;
2196
2197                 /* now reorganize base/index */
2198                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2199                     ((hb == b && ht == EAH_NOTBASE)
2200                      || (hb == i && ht == EAH_MAKEBASE))) {
2201                     /* swap if hints say so */
2202                     t = bt, bt = it, it = t;
2203                     t = bx, bx = ix, ix = t;
2204                 }
2205                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2206                     bt = -1, bx = 0, s++;
2207                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2208                     /* make single reg base, unless hint */
2209                     bt = it, bx = ix, it = -1, ix = 0;
2210                 }
2211                 if (((s == 2 && it != REG_NUM_ESP
2212                       && !(input->eaflags & EAF_TIMESTWO)) || s == 3
2213                      || s == 5 || s == 9) && bt == -1)
2214                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2215                 if (it == -1 && (bt & 7) != REG_NUM_ESP
2216                     && (input->eaflags & EAF_TIMESTWO))
2217                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2218                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2219                 if (s == 1 && it == REG_NUM_ESP) {
2220                     /* swap ESP into base if scale is 1 */
2221                     t = it, it = bt, bt = t;
2222                     t = ix, ix = bx, bx = t;
2223                 }
2224                 if (it == REG_NUM_ESP
2225                     || (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2226                     return NULL;        /* wrong, for various reasons */
2227
2228                 output->rex |= rexflags(it, ix, REX_X);
2229                 output->rex |= rexflags(bt, bx, REX_B);
2230
2231                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2232                     /* no SIB needed */
2233                     int mod, rm;
2234
2235                     if (bt == -1) {
2236                         rm = 5;
2237                         mod = 0;
2238                     } else {
2239                         rm = (bt & 7);
2240                         if (rm != REG_NUM_EBP && o == 0 &&
2241                                 seg == NO_SEG && !forw_ref &&
2242                                 !(input->eaflags &
2243                                   (EAF_BYTEOFFS | EAF_WORDOFFS)))
2244                             mod = 0;
2245                         else if (input->eaflags & EAF_BYTEOFFS ||
2246                                  (o >= -128 && o <= 127 && seg == NO_SEG
2247                                   && !forw_ref
2248                                   && !(input->eaflags & EAF_WORDOFFS)))
2249                             mod = 1;
2250                         else
2251                             mod = 2;
2252                     }
2253
2254                     output->sib_present = false;
2255                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2256                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2257                 } else {
2258                     /* we need a SIB */
2259                     int mod, scale, index, base;
2260
2261                     if (it == -1)
2262                         index = 4, s = 1;
2263                     else
2264                         index = (it & 7);
2265
2266                     switch (s) {
2267                     case 1:
2268                         scale = 0;
2269                         break;
2270                     case 2:
2271                         scale = 1;
2272                         break;
2273                     case 4:
2274                         scale = 2;
2275                         break;
2276                     case 8:
2277                         scale = 3;
2278                         break;
2279                     default:   /* then what the smeg is it? */
2280                         return NULL;    /* panic */
2281                     }
2282
2283                     if (bt == -1) {
2284                         base = 5;
2285                         mod = 0;
2286                     } else {
2287                         base = (bt & 7);
2288                         if (base != REG_NUM_EBP && o == 0 &&
2289                                     seg == NO_SEG && !forw_ref &&
2290                                     !(input->eaflags &
2291                                       (EAF_BYTEOFFS | EAF_WORDOFFS)))
2292                             mod = 0;
2293                         else if (input->eaflags & EAF_BYTEOFFS ||
2294                                  (o >= -128 && o <= 127 && seg == NO_SEG
2295                                   && !forw_ref
2296                                   && !(input->eaflags & EAF_WORDOFFS)))
2297                             mod = 1;
2298                         else
2299                             mod = 2;
2300                     }
2301
2302                     output->sib_present = true;
2303                     output->bytes =  (bt == -1 || mod == 2 ? 4 : mod);
2304                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | 4;
2305                     output->sib = (scale << 6) | (index << 3) | base;
2306                 }
2307             } else {            /* it's 16-bit */
2308                 int mod, rm;
2309
2310                 /* check for 64-bit long mode */
2311                 if (addrbits == 64)
2312                     return NULL;
2313
2314                 /* check all registers are BX, BP, SI or DI */
2315                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI
2316                      && b != R_DI) || (i != -1 && i != R_BP && i != R_BX
2317                                        && i != R_SI && i != R_DI))
2318                     return NULL;
2319
2320                 /* ensure the user didn't specify DWORD/QWORD */
2321                 if (input->disp_size == 32 || input->disp_size == 64)
2322                     return NULL;
2323
2324                 if (s != 1 && i != -1)
2325                     return NULL;        /* no can do, in 16-bit EA */
2326                 if (b == -1 && i != -1) {
2327                     int tmp = b;
2328                     b = i;
2329                     i = tmp;
2330                 }               /* swap */
2331                 if ((b == R_SI || b == R_DI) && i != -1) {
2332                     int tmp = b;
2333                     b = i;
2334                     i = tmp;
2335                 }
2336                 /* have BX/BP as base, SI/DI index */
2337                 if (b == i)
2338                     return NULL;        /* shouldn't ever happen, in theory */
2339                 if (i != -1 && b != -1 &&
2340                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2341                     return NULL;        /* invalid combinations */
2342                 if (b == -1)    /* pure offset: handled above */
2343                     return NULL;        /* so if it gets to here, panic! */
2344
2345                 rm = -1;
2346                 if (i != -1)
2347                     switch (i * 256 + b) {
2348                     case R_SI * 256 + R_BX:
2349                         rm = 0;
2350                         break;
2351                     case R_DI * 256 + R_BX:
2352                         rm = 1;
2353                         break;
2354                     case R_SI * 256 + R_BP:
2355                         rm = 2;
2356                         break;
2357                     case R_DI * 256 + R_BP:
2358                         rm = 3;
2359                         break;
2360                 } else
2361                     switch (b) {
2362                     case R_SI:
2363                         rm = 4;
2364                         break;
2365                     case R_DI:
2366                         rm = 5;
2367                         break;
2368                     case R_BP:
2369                         rm = 6;
2370                         break;
2371                     case R_BX:
2372                         rm = 7;
2373                         break;
2374                     }
2375                 if (rm == -1)   /* can't happen, in theory */
2376                     return NULL;        /* so panic if it does */
2377
2378                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2379                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2380                     mod = 0;
2381                 else if (input->eaflags & EAF_BYTEOFFS ||
2382                          (o >= -128 && o <= 127 && seg == NO_SEG
2383                           && !forw_ref
2384                           && !(input->eaflags & EAF_WORDOFFS)))
2385                     mod = 1;
2386                 else
2387                     mod = 2;
2388
2389                 output->sib_present = false;    /* no SIB - it's 16-bit */
2390                 output->bytes = mod;    /* bytes of offset needed */
2391                 output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2392             }
2393         }
2394     }
2395
2396     output->size = 1 + output->sib_present + output->bytes;
2397     return output;
2398 }
2399
2400 static void add_asp(insn *ins, int addrbits)
2401 {
2402     int j, valid;
2403     int defdisp;
2404
2405     valid = (addrbits == 64) ? 64|32 : 32|16;
2406
2407     switch (ins->prefixes[PPS_ASIZE]) {
2408     case P_A16:
2409         valid &= 16;
2410         break;
2411     case P_A32:
2412         valid &= 32;
2413         break;
2414     case P_A64:
2415         valid &= 64;
2416         break;
2417     case P_ASP:
2418         valid &= (addrbits == 32) ? 16 : 32;
2419         break;
2420     default:
2421         break;
2422     }
2423
2424     for (j = 0; j < ins->operands; j++) {
2425         if (!(MEMORY & ~ins->oprs[j].type)) {
2426             int32_t i, b;
2427
2428             /* Verify as Register */
2429             if (ins->oprs[j].indexreg < EXPR_REG_START
2430                 || ins->oprs[j].indexreg >= REG_ENUM_LIMIT)
2431                 i = 0;
2432             else
2433                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2434
2435             /* Verify as Register */
2436             if (ins->oprs[j].basereg < EXPR_REG_START
2437                 || ins->oprs[j].basereg >= REG_ENUM_LIMIT)
2438                 b = 0;
2439             else
2440                 b = nasm_reg_flags[ins->oprs[j].basereg];
2441
2442             if (ins->oprs[j].scale == 0)
2443                 i = 0;
2444
2445             if (!i && !b) {
2446                 int ds = ins->oprs[j].disp_size;
2447                 if ((addrbits != 64 && ds > 8) ||
2448                     (addrbits == 64 && ds == 16))
2449                     valid &= ds;
2450             } else {
2451                 if (!(REG16 & ~b))
2452                     valid &= 16;
2453                 if (!(REG32 & ~b))
2454                     valid &= 32;
2455                 if (!(REG64 & ~b))
2456                     valid &= 64;
2457
2458                 if (!(REG16 & ~i))
2459                     valid &= 16;
2460                 if (!(REG32 & ~i))
2461                     valid &= 32;
2462                 if (!(REG64 & ~i))
2463                     valid &= 64;
2464             }
2465         }
2466     }
2467
2468     if (valid & addrbits) {
2469         ins->addr_size = addrbits;
2470     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2471         /* Add an address size prefix */
2472         enum prefixes pref = (addrbits == 32) ? P_A16 : P_A32;
2473         ins->prefixes[PPS_ASIZE] = pref;
2474         ins->addr_size = (addrbits == 32) ? 16 : 32;
2475     } else {
2476         /* Impossible... */
2477         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2478         ins->addr_size = addrbits; /* Error recovery */
2479     }
2480
2481     defdisp = ins->addr_size == 16 ? 16 : 32;
2482
2483     for (j = 0; j < ins->operands; j++) {
2484         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2485             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp)
2486             != ins->addr_size) {
2487             /* mem_offs sizes must match the address size; if not,
2488                strip the MEM_OFFS bit and match only EA instructions */
2489             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2490         }
2491     }
2492 }