assemble.c

   1 /* assemble.c   code generation for the Netwide Assembler
   2  *
   3  * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
   4  * Julian Hall. All rights reserved. The software is
   5  * redistributable under the license given in the file "LICENSE"
   6  * distributed in the NASM archive.
   7  *
   8  * the actual codes (C syntax, i.e. octal):
   9  * \0            - terminates the code. (Unless it's a literal of course.)
  10  * \1, \2, \3    - that many literal bytes follow in the code stream
  11  * \4, \6        - the POP/PUSH (respectively) codes for CS, DS, ES, SS
  12  *                 (POP is never used for CS) depending on operand 0
  13  * \5, \7        - the second byte of POP/PUSH codes for FS, GS, depending
  14  *                 on operand 0
  15  * \10..\13      - a literal byte follows in the code stream, to be added
  16  *                 to the register value of operand 0..3
  17  * \14..\17      - a signed byte immediate operand, from operand 0..3
  18  * \20..\23      - a byte immediate operand, from operand 0..3
  19  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  20  * \30..\33      - a word immediate operand, from operand 0..3
  21  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  22  *                 assembly mode or the operand-size override on the operand
  23  * \40..\43      - a long immediate operand, from operand 0..3
  24  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  25  *                 depending on the address size of the instruction.
  26  * \50..\53      - a byte relative operand, from operand 0..3
  27  * \54..\57      - a qword immediate operand, from operand 0..3
  28  * \60..\63      - a word relative operand, from operand 0..3
  29  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  30  *                 assembly mode or the operand-size override on the operand
  31  * \70..\73      - a long relative operand, from operand 0..3
  32  * \74..\77      - a word constant, from the _segment_ part of operand 0..3
  33  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  34  *                 field the register value of operand b.
  35  * \140..\143    - an immediate word or signed byte for operand 0..3
  36  * \144..\147    - or 2 (s-field) into opcode byte if operand 0..3
  37  *                  is a signed byte rather than a word.  Opcode byte follows.
  38  * \150..\153    - an immediate dword or signed byte for operand 0..3
  39  * \154..\157    - or 2 (s-field) into opcode byte if operand 0..3
  40  *                  is a signed byte rather than a dword.  Opcode byte follows.
  41  * \160..\163    - this instruction uses DREX rather than REX, with the
  42  *                 OC0 field set to 0, and the dest field taken from
  43  *                 operand 0..3.
  44  * \164..\167    - this instruction uses DREX rather than REX, with the
  45  *                 OC0 field set to 1, and the dest field taken from
  46  *                 operand 0..3.
  47  * \171          - placement of DREX suffix in the absence of an EA
  48  * \172\ab       - the register number from operand a in bits 7..4, with
  49  *                 the 4-bit immediate from operand b in bits 3..0.
  50  * \173\xab      - the register number from operand a in bits 7..4, with
  51  *                 the value b in bits 3..0.
  52  * \174\a        - the register number from operand a in bits 7..4, and
  53  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  54  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  55  *                 field equal to digit b.
  56  * \250..\253    - same as \150..\153, except warn if the 64-bit operand
  57  *                 is not equal to the truncated and sign-extended 32-bit
  58  *                 operand; used for 32-bit immediates in 64-bit mode.
  59  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
  60  * \260..\263    - this instruction uses VEX rather than REX, with the
  61  *                 V field taken from operand 0..3.
  62  * \270          - this instruction uses VEX rather than REX, with the
  63  *                 V field set to 1111b.
  64  *
  65  * VEX prefixes are followed by the sequence:
  66  * \mm\wlp         where mm is the M field; and wlp is:
  67  *                 00 0ww lpp
  68  *                 [w0] ww = 0 for W = 0
  69  *                 [w1] ww = 1 for W = 1
  70  *                 [wx] ww = 2 for W don't care (always assembled as 0)
  71  *                 [ww] ww = 3 for W used as REX.W
  72  *
  73  *
  74  * \274..\277    - a signed byte immediate operand, from operand 0..3,
  75  *                 which is to be extended to the operand size.
  76  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
  77  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
  78  * \312          - (disassembler only) marker on LOOP, LOOPxx instructions.
  79  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
  80  * \314          - (disassembler only) invalid with REX.B
  81  * \315          - (disassembler only) invalid with REX.X
  82  * \316          - (disassembler only) invalid with REX.R
  83  * \317          - (disassembler only) invalid with REX.W
  84  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
  85  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
  86  * \322          - indicates that this instruction is only valid when the
  87  *                 operand size is the default (instruction to disassembler,
  88  *                 generates no code in the assembler)
  89  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
  90  * \324          - indicates 64-bit operand size requiring REX prefix.
  91  * \330          - a literal byte follows in the code stream, to be added
  92  *                 to the condition code value of the instruction.
  93  * \331          - instruction not valid with REP prefix.  Hint for
  94  *                 disassembler only; for SSE instructions.
  95  * \332          - REP prefix (0xF2 byte) used as opcode extension.
  96  * \333          - REP prefix (0xF3 byte) used as opcode extension.
  97  * \334          - LOCK prefix used instead of REX.R
  98  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
  99  * \336          - force a REP(E) prefix (0xF2) even if not specified.
 100  * \337          - force a REPNE prefix (0xF3) even if not specified.
 101  *                 \336-\337 are still listed as prefixes in the disassembler.
 102  * \340          - reserve <operand 0> bytes of uninitialized storage.
 103  *                 Operand 0 had better be a segmentless constant.
 104  * \360          - no SSE prefix (== \364\331)
 105  * \361          - 66 SSE prefix (== \366\331)
 106  * \362          - F2 SSE prefix (== \364\332)
 107  * \363          - F3 SSE prefix (== \364\333)
 108  * \364          - operand-size prefix (0x66) not permitted
 109  * \365          - address-size prefix (0x67) not permitted
 110  * \366          - operand-size prefix (0x66) used as opcode extension
 111  * \367          - address-size prefix (0x67) used as opcode extension
 112  * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
 113  *                 370 is used for Jcc, 371 is used for JMP.
 114  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 115  *                 used for conditional jump over longer jump
 116  */
 117
 118 #include "compiler.h"
 119
 120 #include <stdio.h>
 121 #include <string.h>
 122 #include <inttypes.h>
 123
 124 #include "nasm.h"
 125 #include "nasmlib.h"
 126 #include "assemble.h"
 127 #include "insns.h"
 128 #include "tables.h"
 129
 130 /* Initialized to zero by the C standard */
 131 static const uint8_t const_zero_buf[256];
 132
 133 typedef struct {
 134     int sib_present;                 /* is a SIB byte necessary? */
 135     int bytes;                       /* # of bytes of offset needed */
 136     int size;                        /* lazy - this is sib+bytes+1 */
 137     uint8_t modrm, sib, rex, rip;    /* the bytes themselves */
 138 } ea;
 139
 140 static uint32_t cpu;            /* cpu level received from nasm.c */
 141 static efunc errfunc;
 142 static struct ofmt *outfmt;
 143 static ListGen *list;
 144
 145 static int64_t calcsize(int32_t, int64_t, int, insn *, const uint8_t *);
 146 static void gencode(int32_t segment, int64_t offset, int bits,
 147                     insn * ins, const struct itemplate *temp,
 148                     int64_t insn_end);
 149 static int matches(const struct itemplate *, insn *, int bits);
 150 static int32_t regflag(const operand *);
 151 static int32_t regval(const operand *);
 152 static int rexflags(int, int32_t, int);
 153 static int op_rexflags(const operand *, int);
 154 static ea *process_ea(operand *, ea *, int, int, int, int32_t);
 155 static void add_asp(insn *, int);
 156
 157 static int has_prefix(insn * ins, enum prefix_pos pos, enum prefixes prefix)
 158 {
 159     return ins->prefixes[pos] == prefix;
 160 }
 161
 162 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 163 {
 164     if (ins->prefixes[pos])
 165         errfunc(ERR_NONFATAL, "invalid %s prefix",
 166                 prefix_name(ins->prefixes[pos]));
 167 }
 168
 169 static const char *size_name(int size)
 170 {
 171     switch (size) {
 172     case 1:
 173         return "byte";
 174     case 2:
 175         return "word";
 176     case 4:
 177         return "dword";
 178     case 8:
 179         return "qword";
 180     case 10:
 181         return "tword";
 182     case 16:
 183         return "oword";
 184     case 32:
 185         return "yword";
 186     default:
 187         return "???";
 188     }
 189 }
 190
 191 static void warn_overflow(int size, const struct operand *o)
 192 {
 193     if (size < 8 && o->wrt == NO_SEG && o->segment == NO_SEG) {
 194         int64_t lim = ((int64_t)1 << (size*8))-1;
 195         int64_t data = o->offset;
 196
 197         if (data < ~lim || data > lim)
 198             errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
 199                     "%s data exceeds bounds", size_name(size));
 200     }
 201 }
 202 /*
 203  * This routine wrappers the real output format's output routine,
 204  * in order to pass a copy of the data off to the listing file
 205  * generator at the same time.
 206  */
 207 static void out(int64_t offset, int32_t segto, const void *data,
 208                 enum out_type type, uint64_t size,
 209                 int32_t segment, int32_t wrt)
 210 {
 211     static int32_t lineno = 0;     /* static!!! */
 212     static char *lnfname = NULL;
 213     uint8_t p[8];
 214
 215     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 216         /*
 217          * This is a non-relocated address, and we're going to
 218          * convert it into RAWDATA format.
 219          */
 220         uint8_t *q = p;
 221
 222         if (size > 8) {
 223             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 224             return;
 225         }
 226
 227         WRITEADDR(q, *(int64_t *)data, size);
 228         data = p;
 229         type = OUT_RAWDATA;
 230     }
 231
 232     list->output(offset, data, type, size);
 233
 234     /*
 235      * this call to src_get determines when we call the
 236      * debug-format-specific "linenum" function
 237      * it updates lineno and lnfname to the current values
 238      * returning 0 if "same as last time", -2 if lnfname
 239      * changed, and the amount by which lineno changed,
 240      * if it did. thus, these variables must be static
 241      */
 242
 243     if (src_get(&lineno, &lnfname)) {
 244         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 245     }
 246
 247     outfmt->output(segto, data, type, size, segment, wrt);
 248 }
 249
 250 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 251                      insn * ins, const uint8_t *code)
 252 {
 253     int64_t isize;
 254     uint8_t c = code[0];
 255
 256     if ((c != 0370 && c != 0371) || (ins->oprs[0].type & STRICT))
 257         return false;
 258     if (!optimizing)
 259         return false;
 260     if (optimizing < 0 && c == 0371)
 261         return false;
 262
 263     isize = calcsize(segment, offset, bits, ins, code);
 264     if (ins->oprs[0].segment != segment)
 265         return false;
 266
 267     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 268     return (isize >= -128 && isize <= 127); /* is it byte size? */
 269 }
 270
 271 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 272               insn * instruction, struct ofmt *output, efunc error,
 273               ListGen * listgen)
 274 {
 275     const struct itemplate *temp;
 276     int j;
 277     int size_prob;
 278     int64_t insn_end;
 279     int32_t itimes;
 280     int64_t start = offset;
 281     int64_t wsize = 0;             /* size for DB etc. */
 282
 283     errfunc = error;            /* to pass to other functions */
 284     cpu = cp;
 285     outfmt = output;            /* likewise */
 286     list = listgen;             /* and again */
 287
 288     switch (instruction->opcode) {
 289     case -1:
 290         return 0;
 291     case I_DB:
 292         wsize = 1;
 293         break;
 294     case I_DW:
 295         wsize = 2;
 296         break;
 297     case I_DD:
 298         wsize = 4;
 299         break;
 300     case I_DQ:
 301         wsize = 8;
 302         break;
 303     case I_DT:
 304         wsize = 10;
 305         break;
 306     case I_DO:
 307         wsize = 16;
 308         break;
 309     case I_DY:
 310         wsize = 32;
 311         break;
 312     default:
 313         break;
 314     }
 315
 316     if (wsize) {
 317         extop *e;
 318         int32_t t = instruction->times;
 319         if (t < 0)
 320             errfunc(ERR_PANIC,
 321                     "instruction->times < 0 (%ld) in assemble()", t);
 322
 323         while (t--) {           /* repeat TIMES times */
 324             for (e = instruction->eops; e; e = e->next) {
 325                 if (e->type == EOT_DB_NUMBER) {
 326                     if (wsize == 1) {
 327                         if (e->segment != NO_SEG)
 328                             errfunc(ERR_NONFATAL,
 329                                     "one-byte relocation attempted");
 330                         else {
 331                             uint8_t out_byte = e->offset;
 332                             out(offset, segment, &out_byte,
 333                                 OUT_RAWDATA, 1, NO_SEG, NO_SEG);
 334                         }
 335                     } else if (wsize > 8) {
 336                         errfunc(ERR_NONFATAL,
 337                                 "integer supplied to a DT, DO or DY"
 338                                 " instruction");
 339                     } else
 340                         out(offset, segment, &e->offset,
 341                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 342                     offset += wsize;
 343                 } else if (e->type == EOT_DB_STRING ||
 344                            e->type == EOT_DB_STRING_FREE) {
 345                     int align;
 346
 347                     out(offset, segment, e->stringval,
 348                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 349                     align = e->stringlen % wsize;
 350
 351                     if (align) {
 352                         align = wsize - align;
 353                         out(offset, segment, const_zero_buf,
 354                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 355                     }
 356                     offset += e->stringlen + align;
 357                 }
 358             }
 359             if (t > 0 && t == instruction->times - 1) {
 360                 /*
 361                  * Dummy call to list->output to give the offset to the
 362                  * listing module.
 363                  */
 364                 list->output(offset, NULL, OUT_RAWDATA, 0);
 365                 list->uplevel(LIST_TIMES);
 366             }
 367         }
 368         if (instruction->times > 1)
 369             list->downlevel(LIST_TIMES);
 370         return offset - start;
 371     }
 372
 373     if (instruction->opcode == I_INCBIN) {
 374         const char *fname = instruction->eops->stringval;
 375         FILE *fp;
 376
 377         fp = fopen(fname, "rb");
 378         if (!fp) {
 379             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 380                   fname);
 381         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 382             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 383                   fname);
 384         } else {
 385             static char buf[4096];
 386             size_t t = instruction->times;
 387             size_t base = 0;
 388             size_t len;
 389
 390             len = ftell(fp);
 391             if (instruction->eops->next) {
 392                 base = instruction->eops->next->offset;
 393                 len -= base;
 394                 if (instruction->eops->next->next &&
 395                     len > (size_t)instruction->eops->next->next->offset)
 396                     len = (size_t)instruction->eops->next->next->offset;
 397             }
 398             /*
 399              * Dummy call to list->output to give the offset to the
 400              * listing module.
 401              */
 402             list->output(offset, NULL, OUT_RAWDATA, 0);
 403             list->uplevel(LIST_INCBIN);
 404             while (t--) {
 405                 size_t l;
 406
 407                 fseek(fp, base, SEEK_SET);
 408                 l = len;
 409                 while (l > 0) {
 410                     int32_t m =
 411                         fread(buf, 1, (l > (int32_t) sizeof(buf) ? (int32_t) sizeof(buf) : l),
 412                               fp);
 413                     if (!m) {
 414                         /*
 415                          * This shouldn't happen unless the file
 416                          * actually changes while we are reading
 417                          * it.
 418                          */
 419                         error(ERR_NONFATAL,
 420                               "`incbin': unexpected EOF while"
 421                               " reading file `%s'", fname);
 422                         t = 0;  /* Try to exit cleanly */
 423                         break;
 424                     }
 425                     out(offset, segment, buf, OUT_RAWDATA, m,
 426                         NO_SEG, NO_SEG);
 427                     l -= m;
 428                 }
 429             }
 430             list->downlevel(LIST_INCBIN);
 431             if (instruction->times > 1) {
 432                 /*
 433                  * Dummy call to list->output to give the offset to the
 434                  * listing module.
 435                  */
 436                 list->output(offset, NULL, OUT_RAWDATA, 0);
 437                 list->uplevel(LIST_TIMES);
 438                 list->downlevel(LIST_TIMES);
 439             }
 440             fclose(fp);
 441             return instruction->times * len;
 442         }
 443         return 0;               /* if we're here, there's an error */
 444     }
 445
 446     /* Check to see if we need an address-size prefix */
 447     add_asp(instruction, bits);
 448
 449     size_prob = false;
 450
 451     for (temp = nasm_instructions[instruction->opcode]; temp->opcode != -1; temp++){
 452         int m = matches(temp, instruction, bits);
 453         if (m == 100 ||
 454             (m == 99 && jmp_match(segment, offset, bits,
 455                                   instruction, temp->code))) {
 456             /* Matches! */
 457             int64_t insn_size = calcsize(segment, offset, bits,
 458                                       instruction, temp->code);
 459             itimes = instruction->times;
 460             if (insn_size < 0)  /* shouldn't be, on pass two */
 461                 error(ERR_PANIC, "errors made it through from pass one");
 462             else
 463                 while (itimes--) {
 464                     for (j = 0; j < MAXPREFIX; j++) {
 465                         uint8_t c = 0;
 466                         switch (instruction->prefixes[j]) {
 467                         case P_LOCK:
 468                             c = 0xF0;
 469                             break;
 470                         case P_REPNE:
 471                         case P_REPNZ:
 472                             c = 0xF2;
 473                             break;
 474                         case P_REPE:
 475                         case P_REPZ:
 476                         case P_REP:
 477                             c = 0xF3;
 478                             break;
 479                         case R_CS:
 480                             if (bits == 64) {
 481                                 error(ERR_WARNING | ERR_PASS2,
 482                                       "cs segment base generated, but will be ignored in 64-bit mode");
 483                             }
 484                             c = 0x2E;
 485                             break;
 486                         case R_DS:
 487                             if (bits == 64) {
 488                                 error(ERR_WARNING | ERR_PASS2,
 489                                       "ds segment base generated, but will be ignored in 64-bit mode");
 490                             }
 491                             c = 0x3E;
 492                             break;
 493                         case R_ES:
 494                            if (bits == 64) {
 495                                 error(ERR_WARNING | ERR_PASS2,
 496                                       "es segment base generated, but will be ignored in 64-bit mode");
 497                            }
 498                             c = 0x26;
 499                             break;
 500                         case R_FS:
 501                             c = 0x64;
 502                             break;
 503                         case R_GS:
 504                             c = 0x65;
 505                             break;
 506                         case R_SS:
 507                             if (bits == 64) {
 508                                 error(ERR_WARNING | ERR_PASS2,
 509                                       "ss segment base generated, but will be ignored in 64-bit mode");
 510                             }
 511                             c = 0x36;
 512                             break;
 513                         case R_SEGR6:
 514                         case R_SEGR7:
 515                             error(ERR_NONFATAL,
 516                                   "segr6 and segr7 cannot be used as prefixes");
 517                             break;
 518                         case P_A16:
 519                             if (bits == 64) {
 520                                 error(ERR_NONFATAL,
 521                                       "16-bit addressing is not supported "
 522                                       "in 64-bit mode");
 523                             } else if (bits != 16)
 524                                 c = 0x67;
 525                             break;
 526                         case P_A32:
 527                             if (bits != 32)
 528                                 c = 0x67;
 529                             break;
 530                         case P_A64:
 531                             if (bits != 64) {
 532                                 error(ERR_NONFATAL,
 533                                       "64-bit addressing is only supported "
 534                                       "in 64-bit mode");
 535                             }
 536                             break;
 537                         case P_ASP:
 538                             c = 0x67;
 539                             break;
 540                         case P_O16:
 541                             if (bits != 16)
 542                                 c = 0x66;
 543                             break;
 544                         case P_O32:
 545                             if (bits == 16)
 546                                 c = 0x66;
 547                             break;
 548                         case P_O64:
 549                             /* REX.W */
 550                             break;
 551                         case P_OSP:
 552                             c = 0x66;
 553                             break;
 554                         case P_none:
 555                             break;
 556                         default:
 557                             error(ERR_PANIC, "invalid instruction prefix");
 558                         }
 559                         if (c != 0) {
 560                             out(offset, segment, &c, OUT_RAWDATA, 1,
 561                                 NO_SEG, NO_SEG);
 562                             offset++;
 563                         }
 564                     }
 565                     insn_end = offset + insn_size;
 566                     gencode(segment, offset, bits, instruction,
 567                             temp, insn_end);
 568                     offset += insn_size;
 569                     if (itimes > 0 && itimes == instruction->times - 1) {
 570                         /*
 571                          * Dummy call to list->output to give the offset to the
 572                          * listing module.
 573                          */
 574                         list->output(offset, NULL, OUT_RAWDATA, 0);
 575                         list->uplevel(LIST_TIMES);
 576                     }
 577                 }
 578             if (instruction->times > 1)
 579                 list->downlevel(LIST_TIMES);
 580             return offset - start;
 581         } else if (m > 0 && m > size_prob) {
 582             size_prob = m;
 583         }
 584     }
 585
 586     if (temp->opcode == -1) {   /* didn't match any instruction */
 587         switch (size_prob) {
 588         case 1:
 589             error(ERR_NONFATAL, "operation size not specified");
 590             break;
 591         case 2:
 592             error(ERR_NONFATAL, "mismatch in operand sizes");
 593             break;
 594         case 3:
 595             error(ERR_NONFATAL, "no instruction for this cpu level");
 596             break;
 597         case 4:
 598             error(ERR_NONFATAL, "instruction not supported in 64-bit mode");
 599             break;
 600         default:
 601             error(ERR_NONFATAL,
 602                   "invalid combination of opcode and operands");
 603             break;
 604         }
 605     }
 606     return 0;
 607 }
 608
 609 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 610                insn * instruction, efunc error)
 611 {
 612     const struct itemplate *temp;
 613
 614     errfunc = error;            /* to pass to other functions */
 615     cpu = cp;
 616
 617     if (instruction->opcode == -1)
 618         return 0;
 619
 620     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 621         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 622         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 623         instruction->opcode == I_DY) {
 624         extop *e;
 625         int32_t isize, osize, wsize = 0;   /* placate gcc */
 626
 627         isize = 0;
 628         switch (instruction->opcode) {
 629         case I_DB:
 630             wsize = 1;
 631             break;
 632         case I_DW:
 633             wsize = 2;
 634             break;
 635         case I_DD:
 636             wsize = 4;
 637             break;
 638         case I_DQ:
 639             wsize = 8;
 640             break;
 641         case I_DT:
 642             wsize = 10;
 643             break;
 644         case I_DO:
 645             wsize = 16;
 646             break;
 647         case I_DY:
 648             wsize = 32;
 649             break;
 650         default:
 651             break;
 652         }
 653
 654         for (e = instruction->eops; e; e = e->next) {
 655             int32_t align;
 656
 657             osize = 0;
 658             if (e->type == EOT_DB_NUMBER)
 659                 osize = 1;
 660             else if (e->type == EOT_DB_STRING ||
 661                      e->type == EOT_DB_STRING_FREE)
 662                 osize = e->stringlen;
 663
 664             align = (-osize) % wsize;
 665             if (align < 0)
 666                 align += wsize;
 667             isize += osize + align;
 668         }
 669         return isize * instruction->times;
 670     }
 671
 672     if (instruction->opcode == I_INCBIN) {
 673         const char *fname = instruction->eops->stringval;
 674         FILE *fp;
 675         size_t len;
 676
 677         fp = fopen(fname, "rb");
 678         if (!fp)
 679             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 680                   fname);
 681         else if (fseek(fp, 0L, SEEK_END) < 0)
 682             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 683                   fname);
 684         else {
 685             len = ftell(fp);
 686             fclose(fp);
 687             if (instruction->eops->next) {
 688                 len -= instruction->eops->next->offset;
 689                 if (instruction->eops->next->next &&
 690                     len > (size_t)instruction->eops->next->next->offset) {
 691                     len = (size_t)instruction->eops->next->next->offset;
 692                 }
 693             }
 694             return instruction->times * len;
 695         }
 696         return 0;               /* if we're here, there's an error */
 697     }
 698
 699     /* Check to see if we need an address-size prefix */
 700     add_asp(instruction, bits);
 701
 702     for (temp = nasm_instructions[instruction->opcode]; temp->opcode != -1; temp++) {
 703         int m = matches(temp, instruction, bits);
 704         if (m == 100 ||
 705             (m == 99 && jmp_match(segment, offset, bits,
 706                                   instruction, temp->code))) {
 707             /* we've matched an instruction. */
 708             int64_t isize;
 709             const uint8_t *codes = temp->code;
 710             int j;
 711
 712             isize = calcsize(segment, offset, bits, instruction, codes);
 713             if (isize < 0)
 714                 return -1;
 715             for (j = 0; j < MAXPREFIX; j++) {
 716                 switch (instruction->prefixes[j]) {
 717                 case P_A16:
 718                     if (bits != 16)
 719                         isize++;
 720                     break;
 721                 case P_A32:
 722                     if (bits != 32)
 723                         isize++;
 724                     break;
 725                 case P_O16:
 726                     if (bits != 16)
 727                         isize++;
 728                     break;
 729                 case P_O32:
 730                     if (bits == 16)
 731                         isize++;
 732                     break;
 733                 case P_A64:
 734                 case P_O64:
 735                 case P_none:
 736                     break;
 737                 default:
 738                     isize++;
 739                     break;
 740                 }
 741             }
 742             return isize * instruction->times;
 743         }
 744     }
 745     return -1;                  /* didn't match any instruction */
 746 }
 747
 748 static bool possible_sbyte(operand *o)
 749 {
 750     return o->wrt == NO_SEG && o->segment == NO_SEG &&
 751         !(o->opflags & OPFLAG_FORWARD) &&
 752         optimizing >= 0 && !(o->type & STRICT);
 753 }
 754
 755 /* check that opn[op]  is a signed byte of size 16 or 32 */
 756 static bool is_sbyte16(operand *o)
 757 {
 758     int16_t v;
 759
 760     if (!possible_sbyte(o))
 761         return false;
 762
 763     v = o->offset;
 764     return v >= -128 && v <= 127;
 765 }
 766
 767 static bool is_sbyte32(operand *o)
 768 {
 769     int32_t v;
 770
 771     if (!possible_sbyte(o))
 772         return false;
 773
 774     v = o->offset;
 775     return v >= -128 && v <= 127;
 776 }
 777
 778 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 779                         insn * ins, const uint8_t *codes)
 780 {
 781     int64_t length = 0;
 782     uint8_t c;
 783     int rex_mask = ~0;
 784     struct operand *opx;
 785
 786     ins->rex = 0;               /* Ensure REX is reset */
 787
 788     if (ins->prefixes[PPS_OSIZE] == P_O64)
 789         ins->rex |= REX_W;
 790
 791     (void)segment;              /* Don't warn that this parameter is unused */
 792     (void)offset;               /* Don't warn that this parameter is unused */
 793
 794     while (*codes) {
 795         c = *codes++;
 796         opx = &ins->oprs[c & 3];
 797         switch (c) {
 798         case 01:
 799         case 02:
 800         case 03:
 801             codes += c, length += c;
 802             break;
 803         case 04:
 804         case 05:
 805         case 06:
 806         case 07:
 807             length++;
 808             break;
 809         case 010:
 810         case 011:
 811         case 012:
 812         case 013:
 813             ins->rex |=
 814                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 815             codes++, length++;
 816             break;
 817         case 014:
 818         case 015:
 819         case 016:
 820         case 017:
 821             length++;
 822             break;
 823         case 020:
 824         case 021:
 825         case 022:
 826         case 023:
 827             length++;
 828             break;
 829         case 024:
 830         case 025:
 831         case 026:
 832         case 027:
 833             length++;
 834             break;
 835         case 030:
 836         case 031:
 837         case 032:
 838         case 033:
 839             length += 2;
 840             break;
 841         case 034:
 842         case 035:
 843         case 036:
 844         case 037:
 845             if (opx->type & (BITS16 | BITS32 | BITS64))
 846                 length += (opx->type & BITS16) ? 2 : 4;
 847             else
 848                 length += (bits == 16) ? 2 : 4;
 849             break;
 850         case 040:
 851         case 041:
 852         case 042:
 853         case 043:
 854             length += 4;
 855             break;
 856         case 044:
 857         case 045:
 858         case 046:
 859         case 047:
 860             length += ins->addr_size >> 3;
 861             break;
 862         case 050:
 863         case 051:
 864         case 052:
 865         case 053:
 866             length++;
 867             break;
 868         case 054:
 869         case 055:
 870         case 056:
 871         case 057:
 872             length += 8; /* MOV reg64/imm */
 873             break;
 874         case 060:
 875         case 061:
 876         case 062:
 877         case 063:
 878             length += 2;
 879             break;
 880         case 064:
 881         case 065:
 882         case 066:
 883         case 067:
 884             if (opx->type & (BITS16 | BITS32 | BITS64))
 885                 length += (opx->type & BITS16) ? 2 : 4;
 886             else
 887                 length += (bits == 16) ? 2 : 4;
 888             break;
 889         case 070:
 890         case 071:
 891         case 072:
 892         case 073:
 893             length += 4;
 894             break;
 895         case 074:
 896         case 075:
 897         case 076:
 898         case 077:
 899             length += 2;
 900             break;
 901         case 0140:
 902         case 0141:
 903         case 0142:
 904         case 0143:
 905             length += is_sbyte16(opx) ? 1 : 2;
 906             break;
 907         case 0144:
 908         case 0145:
 909         case 0146:
 910         case 0147:
 911             codes++;
 912             length++;
 913             break;
 914         case 0150:
 915         case 0151:
 916         case 0152:
 917         case 0153:
 918             length += is_sbyte32(opx) ? 1 : 4;
 919             break;
 920         case 0154:
 921         case 0155:
 922         case 0156:
 923         case 0157:
 924             codes++;
 925             length++;
 926             break;
 927         case 0160:
 928         case 0161:
 929         case 0162:
 930         case 0163:
 931             length++;
 932             ins->rex |= REX_D;
 933             ins->drexdst = regval(opx);
 934             break;
 935         case 0164:
 936         case 0165:
 937         case 0166:
 938         case 0167:
 939             length++;
 940             ins->rex |= REX_D|REX_OC;
 941             ins->drexdst = regval(opx);
 942             break;
 943         case 0171:
 944             break;
 945         case 0172:
 946         case 0173:
 947         case 0174:
 948             codes++;
 949             length++;
 950             break;
 951         case 0250:
 952         case 0251:
 953         case 0252:
 954         case 0253:
 955             length += is_sbyte32(opx) ? 1 : 4;
 956             break;
 957         case 0254:
 958         case 0255:
 959         case 0256:
 960         case 0257:
 961             length += 4;
 962             break;
 963         case 0260:
 964         case 0261:
 965         case 0262:
 966         case 0263:
 967             ins->rex |= REX_V;
 968             ins->drexdst = regval(opx);
 969             ins->vex_m = *codes++;
 970             ins->vex_wlp = *codes++;
 971             break;
 972         case 0270:
 973             ins->rex |= REX_V;
 974             ins->drexdst = 0;
 975             ins->vex_m = *codes++;
 976             ins->vex_wlp = *codes++;
 977             break;
 978         case 0274:
 979         case 0275:
 980         case 0276:
 981         case 0277:
 982             length++;
 983             break;
 984         case 0300:
 985         case 0301:
 986         case 0302:
 987         case 0303:
 988             break;
 989         case 0310:
 990             if (bits == 64)
 991                 return -1;
 992             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
 993             break;
 994         case 0311:
 995             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
 996             break;
 997         case 0312:
 998             break;
 999         case 0313:
1000             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
1001                 has_prefix(ins, PPS_ASIZE, P_A32))
1002                 return -1;
1003             break;
1004         case 0314:
1005         case 0315:
1006         case 0316:
1007         case 0317:
1008             break;
1009         case 0320:
1010             length += (bits != 16);
1011             break;
1012         case 0321:
1013             length += (bits == 16);
1014             break;
1015         case 0322:
1016             break;
1017         case 0323:
1018             rex_mask &= ~REX_W;
1019             break;
1020         case 0324:
1021             ins->rex |= REX_W;
1022             break;
1023         case 0330:
1024             codes++, length++;
1025             break;
1026         case 0331:
1027             break;
1028         case 0332:
1029         case 0333:
1030             length++;
1031             break;
1032         case 0334:
1033             ins->rex |= REX_L;
1034             break;
1035         case 0335:
1036             break;
1037         case 0336:
1038             if (!ins->prefixes[PPS_LREP])
1039                 ins->prefixes[PPS_LREP] = P_REP;
1040             break;
1041         case 0337:
1042             if (!ins->prefixes[PPS_LREP])
1043                 ins->prefixes[PPS_LREP] = P_REPNE;
1044             break;
1045         case 0340:
1046             if (ins->oprs[0].segment != NO_SEG)
1047                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1048                         " quantity of BSS space");
1049             else
1050                 length += ins->oprs[0].offset;
1051             break;
1052         case 0360:
1053             break;
1054         case 0361:
1055         case 0362:
1056         case 0363:
1057             length++;
1058             break;
1059         case 0364:
1060         case 0365:
1061             break;
1062         case 0366:
1063         case 0367:
1064             length++;
1065             break;
1066         case 0370:
1067         case 0371:
1068         case 0372:
1069             break;
1070         case 0373:
1071             length++;
1072             break;
1073         default:               /* can't do it by 'case' statements */
1074             if (c >= 0100 && c <= 0277) {       /* it's an EA */
1075                 ea ea_data;
1076                 int rfield;
1077                 int32_t rflags;
1078                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1079
1080                 if (c <= 0177) {
1081                     /* pick rfield from operand b */
1082                     rflags = regflag(&ins->oprs[c & 7]);
1083                     rfield = nasm_regvals[ins->oprs[c & 7].basereg];
1084                 } else {
1085                     rflags = 0;
1086                     rfield = c & 7;
1087                 }
1088
1089                 if (!process_ea
1090                     (&ins->oprs[(c >> 3) & 7], &ea_data, bits,
1091                      ins->addr_size, rfield, rflags)) {
1092                     errfunc(ERR_NONFATAL, "invalid effective address");
1093                     return -1;
1094                 } else {
1095                     ins->rex |= ea_data.rex;
1096                     length += ea_data.size;
1097                 }
1098             } else {
1099                 errfunc(ERR_PANIC, "internal instruction table corrupt"
1100                         ": instruction code 0x%02X given", c);
1101             }
1102         }
1103     }
1104
1105     ins->rex &= rex_mask;
1106
1107     if (ins->rex & REX_V) {
1108         int bad32 = REX_R|REX_W|REX_X|REX_B;
1109
1110         if (ins->rex & REX_H) {
1111             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1112             return -1;
1113         }
1114         switch (ins->vex_wlp & 030) {
1115         case 000:
1116         case 020:
1117             ins->rex &= ~REX_W;
1118             break;
1119         case 010:
1120             ins->rex |= REX_W;
1121             bad32 &= ~REX_W;
1122             break;
1123         case 030:
1124             /* Follow REX_W */
1125             break;
1126         }
1127
1128         if (bits != 64 && ((ins->rex & bad32) || ins->drexdst > 7)) {
1129             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1130             return -1;
1131         }
1132         if (ins->vex_m != 1 || (ins->rex & (REX_W|REX_R|REX_B)))
1133             length += 3;
1134         else
1135             length += 2;
1136     } else if (ins->rex & REX_D) {
1137         if (ins->rex & REX_H) {
1138             errfunc(ERR_NONFATAL, "cannot use high register in drex instruction");
1139             return -1;
1140         }
1141         if (bits != 64 && ((ins->rex & (REX_R|REX_W|REX_X|REX_B)) ||
1142                            ins->drexdst > 7)) {
1143             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1144             return -1;
1145         }
1146         length++;
1147     } else if (ins->rex & REX_REAL) {
1148         if (ins->rex & REX_H) {
1149             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1150             return -1;
1151         } else if (bits == 64) {
1152             length++;
1153         } else if ((ins->rex & REX_L) &&
1154                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1155                    cpu >= IF_X86_64) {
1156             /* LOCK-as-REX.R */
1157             assert_no_prefix(ins, PPS_LREP);
1158             length++;
1159         } else {
1160             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1161             return -1;
1162         }
1163     }
1164
1165     return length;
1166 }
1167
1168 #define EMIT_REX()                                                      \
1169     if (!(ins->rex & (REX_D|REX_V)) && (ins->rex & REX_REAL) && (bits == 64)) { \
1170         ins->rex = (ins->rex & REX_REAL)|REX_P;                         \
1171         out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG); \
1172         ins->rex = 0;                                                   \
1173         offset += 1; \
1174     }
1175
1176 static void gencode(int32_t segment, int64_t offset, int bits,
1177                     insn * ins, const struct itemplate *temp,
1178                     int64_t insn_end)
1179 {
1180     static char condval[] = {   /* conditional opcodes */
1181         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1182         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1183         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1184     };
1185     uint8_t c;
1186     uint8_t bytes[4];
1187     int64_t size;
1188     int64_t data;
1189     struct operand *opx;
1190     const uint8_t *codes = temp->code;
1191
1192     while (*codes) {
1193         c = *codes++;
1194         opx = &ins->oprs[c & 3];
1195         switch (c) {
1196         case 01:
1197         case 02:
1198         case 03:
1199             EMIT_REX();
1200             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1201             codes += c;
1202             offset += c;
1203             break;
1204
1205         case 04:
1206         case 06:
1207             switch (ins->oprs[0].basereg) {
1208             case R_CS:
1209                 bytes[0] = 0x0E + (c == 0x04 ? 1 : 0);
1210                 break;
1211             case R_DS:
1212                 bytes[0] = 0x1E + (c == 0x04 ? 1 : 0);
1213                 break;
1214             case R_ES:
1215                 bytes[0] = 0x06 + (c == 0x04 ? 1 : 0);
1216                 break;
1217             case R_SS:
1218                 bytes[0] = 0x16 + (c == 0x04 ? 1 : 0);
1219                 break;
1220             default:
1221                 errfunc(ERR_PANIC,
1222                         "bizarre 8086 segment register received");
1223             }
1224             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1225             offset++;
1226             break;
1227
1228         case 05:
1229         case 07:
1230             switch (ins->oprs[0].basereg) {
1231             case R_FS:
1232                 bytes[0] = 0xA0 + (c == 0x05 ? 1 : 0);
1233                 break;
1234             case R_GS:
1235                 bytes[0] = 0xA8 + (c == 0x05 ? 1 : 0);
1236                 break;
1237             default:
1238                 errfunc(ERR_PANIC,
1239                         "bizarre 386 segment register received");
1240             }
1241             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1242             offset++;
1243             break;
1244
1245         case 010:
1246         case 011:
1247         case 012:
1248         case 013:
1249             EMIT_REX();
1250             bytes[0] = *codes++ + ((regval(opx)) & 7);
1251             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1252             offset += 1;
1253             break;
1254
1255         case 014:
1256         case 015:
1257         case 016:
1258         case 017:
1259             /* The test for BITS8 and SBYTE here is intended to avoid
1260                warning on optimizer actions due to SBYTE, while still
1261                warn on explicit BYTE directives.  Also warn, obviously,
1262                if the optimizer isn't enabled. */
1263             if (((opx->type & BITS8) ||
1264                  !(opx->type & temp->opd[c & 3] & BYTENESS)) &&
1265                 (opx->offset < -128 || opx->offset > 127)) {
1266                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1267                         "signed byte value exceeds bounds");
1268             }
1269             if (opx->segment != NO_SEG) {
1270                 data = opx->offset;
1271                 out(offset, segment, &data, OUT_ADDRESS, 1,
1272                     opx->segment, opx->wrt);
1273             } else {
1274                 bytes[0] = opx->offset;
1275                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1276                     NO_SEG);
1277             }
1278             offset += 1;
1279             break;
1280
1281         case 020:
1282         case 021:
1283         case 022:
1284         case 023:
1285             if (opx->offset < -256 || opx->offset > 255) {
1286                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1287                         "byte value exceeds bounds");
1288             }
1289             if (opx->segment != NO_SEG) {
1290                 data = opx->offset;
1291                 out(offset, segment, &data, OUT_ADDRESS, 1,
1292                     opx->segment, opx->wrt);
1293             } else {
1294                 bytes[0] = opx->offset;
1295                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1296                     NO_SEG);
1297             }
1298             offset += 1;
1299             break;
1300
1301         case 024:
1302         case 025:
1303         case 026:
1304         case 027:
1305             if (opx->offset < 0 || opx->offset > 255)
1306                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1307                         "unsigned byte value exceeds bounds");
1308             if (opx->segment != NO_SEG) {
1309                 data = opx->offset;
1310                 out(offset, segment, &data, OUT_ADDRESS, 1,
1311                     opx->segment, opx->wrt);
1312             } else {
1313                 bytes[0] = opx->offset;
1314                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1315                     NO_SEG);
1316             }
1317             offset += 1;
1318             break;
1319
1320         case 030:
1321         case 031:
1322         case 032:
1323         case 033:
1324             warn_overflow(2, opx);
1325             data = opx->offset;
1326             out(offset, segment, &data, OUT_ADDRESS, 2,
1327                 opx->segment, opx->wrt);
1328             offset += 2;
1329             break;
1330
1331         case 034:
1332         case 035:
1333         case 036:
1334         case 037:
1335             if (opx->type & (BITS16 | BITS32))
1336                 size = (opx->type & BITS16) ? 2 : 4;
1337             else
1338                 size = (bits == 16) ? 2 : 4;
1339             warn_overflow(size, opx);
1340             data = opx->offset;
1341             out(offset, segment, &data, OUT_ADDRESS, size,
1342                 opx->segment, opx->wrt);
1343             offset += size;
1344             break;
1345
1346         case 040:
1347         case 041:
1348         case 042:
1349         case 043:
1350             warn_overflow(4, opx);
1351             data = opx->offset;
1352             out(offset, segment, &data, OUT_ADDRESS, 4,
1353                 opx->segment, opx->wrt);
1354             offset += 4;
1355             break;
1356
1357         case 044:
1358         case 045:
1359         case 046:
1360         case 047:
1361             data = opx->offset;
1362             size = ins->addr_size >> 3;
1363             warn_overflow(size, opx);
1364             out(offset, segment, &data, OUT_ADDRESS, size,
1365                 opx->segment, opx->wrt);
1366             offset += size;
1367             break;
1368
1369         case 050:
1370         case 051:
1371         case 052:
1372         case 053:
1373             if (opx->segment != segment)
1374                 errfunc(ERR_NONFATAL,
1375                         "short relative jump outside segment");
1376             data = opx->offset - insn_end;
1377             if (data > 127 || data < -128)
1378                 errfunc(ERR_NONFATAL, "short jump is out of range");
1379             bytes[0] = data;
1380             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1381             offset += 1;
1382             break;
1383
1384         case 054:
1385         case 055:
1386         case 056:
1387         case 057:
1388             data = (int64_t)opx->offset;
1389             out(offset, segment, &data, OUT_ADDRESS, 8,
1390                 opx->segment, opx->wrt);
1391             offset += 8;
1392             break;
1393
1394         case 060:
1395         case 061:
1396         case 062:
1397         case 063:
1398             if (opx->segment != segment) {
1399                 data = opx->offset;
1400                 out(offset, segment, &data,
1401                     OUT_REL2ADR, insn_end - offset,
1402                     opx->segment, opx->wrt);
1403             } else {
1404                 data = opx->offset - insn_end;
1405                 out(offset, segment, &data,
1406                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1407             }
1408             offset += 2;
1409             break;
1410
1411         case 064:
1412         case 065:
1413         case 066:
1414         case 067:
1415             if (opx->type & (BITS16 | BITS32 | BITS64))
1416                 size = (opx->type & BITS16) ? 2 : 4;
1417             else
1418                 size = (bits == 16) ? 2 : 4;
1419             if (opx->segment != segment) {
1420                 data = opx->offset;
1421                 out(offset, segment, &data,
1422                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1423                     insn_end - offset, opx->segment, opx->wrt);
1424             } else {
1425                 data = opx->offset - insn_end;
1426                 out(offset, segment, &data,
1427                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1428             }
1429             offset += size;
1430             break;
1431
1432         case 070:
1433         case 071:
1434         case 072:
1435         case 073:
1436             if (opx->segment != segment) {
1437                 data = opx->offset;
1438                 out(offset, segment, &data,
1439                     OUT_REL4ADR, insn_end - offset,
1440                     opx->segment, opx->wrt);
1441             } else {
1442                 data = opx->offset - insn_end;
1443                 out(offset, segment, &data,
1444                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1445             }
1446             offset += 4;
1447             break;
1448
1449         case 074:
1450         case 075:
1451         case 076:
1452         case 077:
1453             if (opx->segment == NO_SEG)
1454                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1455                         " relocatable");
1456             data = 0;
1457             out(offset, segment, &data, OUT_ADDRESS, 2,
1458                 outfmt->segbase(1 + opx->segment),
1459                 opx->wrt);
1460             offset += 2;
1461             break;
1462
1463         case 0140:
1464         case 0141:
1465         case 0142:
1466         case 0143:
1467             data = opx->offset;
1468             warn_overflow(2, opx);
1469             if (is_sbyte16(opx)) {
1470                 bytes[0] = data;
1471                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1472                     NO_SEG);
1473                 offset++;
1474             } else {
1475                 out(offset, segment, &data, OUT_ADDRESS, 2,
1476                     opx->segment, opx->wrt);
1477                 offset += 2;
1478             }
1479             break;
1480
1481         case 0144:
1482         case 0145:
1483         case 0146:
1484         case 0147:
1485             EMIT_REX();
1486             bytes[0] = *codes++;
1487             if (is_sbyte16(opx))
1488                 bytes[0] |= 2;  /* s-bit */
1489             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1490             offset++;
1491             break;
1492
1493         case 0150:
1494         case 0151:
1495         case 0152:
1496         case 0153:
1497             data = opx->offset;
1498             warn_overflow(4, opx);
1499             if (is_sbyte32(opx)) {
1500                 bytes[0] = data;
1501                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1502                     NO_SEG);
1503                 offset++;
1504             } else {
1505                 out(offset, segment, &data, OUT_ADDRESS, 4,
1506                     opx->segment, opx->wrt);
1507                 offset += 4;
1508             }
1509             break;
1510
1511         case 0154:
1512         case 0155:
1513         case 0156:
1514         case 0157:
1515             EMIT_REX();
1516             bytes[0] = *codes++;
1517             if (is_sbyte32(opx))
1518                 bytes[0] |= 2;  /* s-bit */
1519             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1520             offset++;
1521             break;
1522
1523         case 0160:
1524         case 0161:
1525         case 0162:
1526         case 0163:
1527         case 0164:
1528         case 0165:
1529         case 0166:
1530         case 0167:
1531             break;
1532
1533         case 0171:
1534             bytes[0] =
1535                 (ins->drexdst << 4) |
1536                 (ins->rex & REX_OC ? 0x08 : 0) |
1537                 (ins->rex & (REX_R|REX_X|REX_B));
1538             ins->rex = 0;
1539             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1540             offset++;
1541             break;
1542
1543         case 0172:
1544             c = *codes++;
1545             opx = &ins->oprs[c >> 3];
1546             bytes[0] = nasm_regvals[opx->basereg] << 4;
1547             opx = &ins->oprs[c & 7];
1548             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1549                 errfunc(ERR_NONFATAL,
1550                         "non-absolute expression not permitted as argument %d",
1551                         c & 7);
1552             } else {
1553                 if (opx->offset & ~15) {
1554                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1555                             "four-bit argument exceeds bounds");
1556                 }
1557                 bytes[0] |= opx->offset & 15;
1558             }
1559             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1560             offset++;
1561             break;
1562
1563         case 0173:
1564             c = *codes++;
1565             opx = &ins->oprs[c >> 4];
1566             bytes[0] = nasm_regvals[opx->basereg] << 4;
1567             bytes[0] |= c & 15;
1568             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1569             offset++;
1570             break;
1571
1572         case 0174:
1573             c = *codes++;
1574             opx = &ins->oprs[c];
1575             bytes[0] = nasm_regvals[opx->basereg] << 4;
1576             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1577             offset++;
1578             break;
1579
1580         case 0250:
1581         case 0251:
1582         case 0252:
1583         case 0253:
1584             data = opx->offset;
1585             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1586                 (int32_t)data != (int64_t)data) {
1587                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1588                         "signed dword immediate exceeds bounds");
1589             }
1590             if (is_sbyte32(opx)) {
1591                 bytes[0] = data;
1592                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1593                     NO_SEG);
1594                 offset++;
1595             } else {
1596                 out(offset, segment, &data, OUT_ADDRESS, 4,
1597                     opx->segment, opx->wrt);
1598                 offset += 4;
1599             }
1600             break;
1601
1602         case 0254:
1603         case 0255:
1604         case 0256:
1605         case 0257:
1606             data = opx->offset;
1607             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1608                 (int32_t)data != (int64_t)data) {
1609                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1610                         "signed dword immediate exceeds bounds");
1611             }
1612             out(offset, segment, &data, OUT_ADDRESS, 4,
1613                 opx->segment, opx->wrt);
1614             offset += 4;
1615             break;
1616
1617         case 0260:
1618         case 0261:
1619         case 0262:
1620         case 0263:
1621         case 0270:
1622             codes += 2;
1623             if (ins->vex_m != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1624                 bytes[0] = 0xc4;
1625                 bytes[1] = ins->vex_m | ((~ins->rex & 7) << 5);
1626                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1627                     ((~ins->drexdst & 15)<< 3) | (ins->vex_wlp & 07);
1628                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1629                 offset += 3;
1630             } else {
1631                 bytes[0] = 0xc5;
1632                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1633                     ((~ins->drexdst & 15) << 3) | (ins->vex_wlp & 07);
1634                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1635                 offset += 2;
1636             }
1637             break;
1638
1639         case 0274:
1640         case 0275:
1641         case 0276:
1642         case 0277:
1643         {
1644             uint64_t uv, um;
1645             int s;
1646
1647             if (ins->rex & REX_W)
1648                 s = 64;
1649             else if (ins->prefixes[PPS_OSIZE] == P_O16)
1650                 s = 16;
1651             else if (ins->prefixes[PPS_OSIZE] == P_O32)
1652                 s = 32;
1653             else
1654                 s = bits;
1655
1656             um = (uint64_t)2 << (s-1);
1657             uv = opx->offset;
1658
1659             if (uv > 127 && uv < (uint64_t)-128 &&
1660                 (uv < um-128 || uv > um-1)) {
1661                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1662                         "signed byte value exceeds bounds");
1663             }
1664             if (opx->segment != NO_SEG) {
1665                 data = um;
1666                 out(offset, segment, &data, OUT_ADDRESS, 1,
1667                     opx->segment, opx->wrt);
1668             } else {
1669                 bytes[0] = um;
1670                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1671                     NO_SEG);
1672             }
1673             offset += 1;
1674             break;
1675         }
1676
1677         case 0300:
1678         case 0301:
1679         case 0302:
1680         case 0303:
1681             break;
1682
1683         case 0310:
1684             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1685                 *bytes = 0x67;
1686                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1687                 offset += 1;
1688             } else
1689                 offset += 0;
1690             break;
1691
1692         case 0311:
1693             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1694                 *bytes = 0x67;
1695                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1696                 offset += 1;
1697             } else
1698                 offset += 0;
1699             break;
1700
1701         case 0312:
1702             break;
1703
1704         case 0313:
1705             ins->rex = 0;
1706             break;
1707
1708         case 0314:
1709         case 0315:
1710         case 0316:
1711         case 0317:
1712             break;
1713
1714         case 0320:
1715             if (bits != 16) {
1716                 *bytes = 0x66;
1717                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1718                 offset += 1;
1719             } else
1720                 offset += 0;
1721             break;
1722
1723         case 0321:
1724             if (bits == 16) {
1725                 *bytes = 0x66;
1726                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1727                 offset += 1;
1728             } else
1729                 offset += 0;
1730             break;
1731
1732         case 0322:
1733         case 0323:
1734             break;
1735
1736         case 0324:
1737             ins->rex |= REX_W;
1738             break;
1739
1740         case 0330:
1741             *bytes = *codes++ ^ condval[ins->condition];
1742             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1743             offset += 1;
1744             break;
1745
1746         case 0331:
1747             break;
1748
1749         case 0332:
1750         case 0333:
1751             *bytes = c - 0332 + 0xF2;
1752             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1753             offset += 1;
1754             break;
1755
1756         case 0334:
1757             if (ins->rex & REX_R) {
1758                 *bytes = 0xF0;
1759                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1760                 offset += 1;
1761             }
1762             ins->rex &= ~(REX_L|REX_R);
1763             break;
1764
1765         case 0335:
1766             break;
1767
1768         case 0336:
1769         case 0337:
1770             break;
1771
1772         case 0340:
1773             if (ins->oprs[0].segment != NO_SEG)
1774                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1775             else {
1776                 int64_t size = ins->oprs[0].offset;
1777                 if (size > 0)
1778                     out(offset, segment, NULL,
1779                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1780                 offset += size;
1781             }
1782             break;
1783
1784         case 0360:
1785             break;
1786
1787         case 0361:
1788             bytes[0] = 0x66;
1789             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1790             offset += 1;
1791             break;
1792
1793         case 0362:
1794         case 0363:
1795             bytes[0] = c - 0362 + 0xf2;
1796             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1797             offset += 1;
1798             break;
1799
1800         case 0364:
1801         case 0365:
1802             break;
1803
1804         case 0366:
1805         case 0367:
1806             *bytes = c - 0366 + 0x66;
1807             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1808             offset += 1;
1809             break;
1810
1811         case 0370:
1812         case 0371:
1813         case 0372:
1814             break;
1815
1816         case 0373:
1817             *bytes = bits == 16 ? 3 : 5;
1818             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1819             offset += 1;
1820             break;
1821
1822         default:               /* can't do it by 'case' statements */
1823             if (c >= 0100 && c <= 0277) {       /* it's an EA */
1824                 ea ea_data;
1825                 int rfield;
1826                 int32_t rflags;
1827                 uint8_t *p;
1828                 int32_t s;
1829                 enum out_type type;
1830
1831                 if (c <= 0177) {
1832                     /* pick rfield from operand b */
1833                     rflags = regflag(&ins->oprs[c & 7]);
1834                     rfield = nasm_regvals[ins->oprs[c & 7].basereg];
1835                 } else {
1836                     /* rfield is constant */
1837                     rflags = 0;
1838                     rfield = c & 7;
1839                 }
1840
1841                 if (!process_ea
1842                     (&ins->oprs[(c >> 3) & 7], &ea_data, bits,
1843                      ins->addr_size, rfield, rflags)) {
1844                     errfunc(ERR_NONFATAL, "invalid effective address");
1845                 }
1846
1847
1848                 p = bytes;
1849                 *p++ = ea_data.modrm;
1850                 if (ea_data.sib_present)
1851                     *p++ = ea_data.sib;
1852
1853                 /* DREX suffixes come between the SIB and the displacement */
1854                 if (ins->rex & REX_D) {
1855                     *p++ =
1856                         (ins->drexdst << 4) |
1857                         (ins->rex & REX_OC ? 0x08 : 0) |
1858                         (ins->rex & (REX_R|REX_X|REX_B));
1859                     ins->rex = 0;
1860                 }
1861
1862                 s = p - bytes;
1863                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1864
1865                 /*
1866                  * Make sure the address gets the right offset in case
1867                  * the line breaks in the .lst file (BR 1197827)
1868                  */
1869                 offset += s;
1870                 s = 0;
1871
1872                 switch (ea_data.bytes) {
1873                 case 0:
1874                     break;
1875                 case 1:
1876                     if (ins->oprs[(c >> 3) & 7].segment != NO_SEG) {
1877                         data = ins->oprs[(c >> 3) & 7].offset;
1878                         out(offset, segment, &data, OUT_ADDRESS, 1,
1879                             ins->oprs[(c >> 3) & 7].segment,
1880                             ins->oprs[(c >> 3) & 7].wrt);
1881                     } else {
1882                         *bytes = ins->oprs[(c >> 3) & 7].offset;
1883                         out(offset, segment, bytes, OUT_RAWDATA, 1,
1884                             NO_SEG, NO_SEG);
1885                     }
1886                     s++;
1887                     break;
1888                 case 8:
1889                 case 2:
1890                 case 4:
1891                     data = ins->oprs[(c >> 3) & 7].offset;
1892                     warn_overflow(ea_data.bytes, opx);
1893                     s += ea_data.bytes;
1894                     if (ea_data.rip) {
1895                         data -= insn_end - (offset+ea_data.bytes);
1896                         type = OUT_REL4ADR;
1897                     } else {
1898                         type = OUT_ADDRESS;
1899                     }
1900                     out(offset, segment, &data, type, ea_data.bytes,
1901                         ins->oprs[(c >> 3) & 7].segment,
1902                         ins->oprs[(c >> 3) & 7].wrt);
1903                     break;
1904                 }
1905                 offset += s;
1906             } else {
1907                 errfunc(ERR_PANIC, "internal instruction table corrupt"
1908                         ": instruction code 0x%02X given", c);
1909             }
1910         }
1911     }
1912 }
1913
1914 static int32_t regflag(const operand * o)
1915 {
1916     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1917         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1918     }
1919     return nasm_reg_flags[o->basereg];
1920 }
1921
1922 static int32_t regval(const operand * o)
1923 {
1924     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1925         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1926     }
1927     return nasm_regvals[o->basereg];
1928 }
1929
1930 static int op_rexflags(const operand * o, int mask)
1931 {
1932     int32_t flags;
1933     int val;
1934
1935     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1936         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1937     }
1938
1939     flags = nasm_reg_flags[o->basereg];
1940     val = nasm_regvals[o->basereg];
1941
1942     return rexflags(val, flags, mask);
1943 }
1944
1945 static int rexflags(int val, int32_t flags, int mask)
1946 {
1947     int rex = 0;
1948
1949     if (val >= 8)
1950         rex |= REX_B|REX_X|REX_R;
1951     if (flags & BITS64)
1952         rex |= REX_W;
1953     if (!(REG_HIGH & ~flags))   /* AH, CH, DH, BH */
1954         rex |= REX_H;
1955     else if (!(REG8 & ~flags) && val >= 4) /* SPL, BPL, SIL, DIL */
1956         rex |= REX_P;
1957
1958     return rex & mask;
1959 }
1960
1961 static int matches(const struct itemplate *itemp, insn * instruction, int bits)
1962 {
1963     int i, size[MAX_OPERANDS], asize, oprs, ret;
1964
1965     ret = 100;
1966
1967     /*
1968      * Check the opcode
1969      */
1970     if (itemp->opcode != instruction->opcode)
1971         return 0;
1972
1973     /*
1974      * Count the operands
1975      */
1976     if (itemp->operands != instruction->operands)
1977         return 0;
1978
1979     /*
1980      * Check that no spurious colons or TOs are present
1981      */
1982     for (i = 0; i < itemp->operands; i++)
1983         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
1984             return 0;
1985
1986     /*
1987      * Process size flags
1988      */
1989     if (itemp->flags & IF_ARMASK) {
1990         memset(size, 0, sizeof size);
1991
1992         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
1993
1994         switch (itemp->flags & IF_SMASK) {
1995         case IF_SB:
1996             size[i] = BITS8;
1997             break;
1998         case IF_SW:
1999             size[i] = BITS16;
2000             break;
2001         case IF_SD:
2002             size[i] = BITS32;
2003             break;
2004         case IF_SQ:
2005             size[i] = BITS64;
2006             break;
2007         case IF_SO:
2008             size[i] = BITS128;
2009             break;
2010         case IF_SY:
2011             size[i] = BITS256;
2012             break;
2013         case IF_SZ:
2014             switch (bits) {
2015             case 16:
2016                 size[i] = BITS16;
2017                 break;
2018             case 32:
2019                 size[i] = BITS32;
2020                 break;
2021             case 64:
2022                 size[i] = BITS64;
2023                 break;
2024             }
2025             break;
2026         default:
2027             break;
2028         }
2029     } else {
2030         asize = 0;
2031         switch (itemp->flags & IF_SMASK) {
2032         case IF_SB:
2033             asize = BITS8;
2034             break;
2035         case IF_SW:
2036             asize = BITS16;
2037             break;
2038         case IF_SD:
2039             asize = BITS32;
2040             break;
2041         case IF_SQ:
2042             asize = BITS64;
2043             break;
2044         case IF_SO:
2045             asize = BITS128;
2046             break;
2047         case IF_SY:
2048             asize = BITS256;
2049             break;
2050         case IF_SZ:
2051             switch (bits) {
2052             case 16:
2053                 asize = BITS16;
2054                 break;
2055             case 32:
2056                 asize = BITS32;
2057                 break;
2058             case 64:
2059                 asize = BITS64;
2060                 break;
2061             }
2062             break;
2063         default:
2064             break;
2065         }
2066         for (i = 0; i < MAX_OPERANDS; i++)
2067             size[i] = asize;
2068     }
2069
2070     /*
2071      * Check that the operand flags all match up
2072      */
2073     for (i = 0; i < itemp->operands; i++) {
2074         int32_t type = instruction->oprs[i].type;
2075         if (!(type & SIZE_MASK))
2076             type |= size[i];
2077
2078         if (itemp->opd[i] & SAME_AS) {
2079             int j = itemp->opd[i] & ~SAME_AS;
2080             if (type != instruction->oprs[j].type ||
2081                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2082                 return 0;
2083         } else if (itemp->opd[i] & ~type ||
2084             ((itemp->opd[i] & SIZE_MASK) &&
2085              ((itemp->opd[i] ^ type) & SIZE_MASK))) {
2086             if ((itemp->opd[i] & ~type & ~SIZE_MASK) ||
2087                 (type & SIZE_MASK))
2088                 return 0;
2089             else
2090                 return 1;
2091         }
2092     }
2093
2094     /*
2095      * Check operand sizes
2096      */
2097     if (itemp->flags & (IF_SM | IF_SM2)) {
2098         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2099         asize = 0;
2100         for (i = 0; i < oprs; i++) {
2101             if ((asize = itemp->opd[i] & SIZE_MASK) != 0) {
2102                 int j;
2103                 for (j = 0; j < oprs; j++)
2104                     size[j] = asize;
2105                 break;
2106             }
2107         }
2108     } else {
2109         oprs = itemp->operands;
2110     }
2111
2112     for (i = 0; i < itemp->operands; i++) {
2113         if (!(itemp->opd[i] & SIZE_MASK) &&
2114             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2115             return 2;
2116     }
2117
2118     /*
2119      * Check template is okay at the set cpu level
2120      */
2121     if (((itemp->flags & IF_PLEVEL) > cpu))
2122         return 3;
2123
2124     /*
2125      * Check if instruction is available in long mode
2126      */
2127     if ((itemp->flags & IF_NOLONG) && (bits == 64))
2128         return 4;
2129
2130     /*
2131      * Check if special handling needed for Jumps
2132      */
2133     if ((uint8_t)(itemp->code[0]) >= 0370)
2134         return 99;
2135
2136     return ret;
2137 }
2138
2139 static ea *process_ea(operand * input, ea * output, int bits,
2140                       int addrbits, int rfield, int32_t rflags)
2141 {
2142     bool forw_ref = !!(input->opflags & OPFLAG_FORWARD);
2143
2144     output->rip = false;
2145
2146     /* REX flags for the rfield operand */
2147     output->rex |= rexflags(rfield, rflags, REX_R|REX_P|REX_W|REX_H);
2148
2149     if (!(REGISTER & ~input->type)) {   /* register direct */
2150         int i;
2151         int32_t f;
2152
2153         if (input->basereg < EXPR_REG_START /* Verify as Register */
2154             || input->basereg >= REG_ENUM_LIMIT)
2155             return NULL;
2156         f = regflag(input);
2157         i = nasm_regvals[input->basereg];
2158
2159         if (REG_EA & ~f)
2160             return NULL;        /* Invalid EA register */
2161
2162         output->rex |= op_rexflags(input, REX_B|REX_P|REX_W|REX_H);
2163
2164         output->sib_present = false;             /* no SIB necessary */
2165         output->bytes = 0;  /* no offset necessary either */
2166         output->modrm = 0xC0 | ((rfield & 7) << 3) | (i & 7);
2167     } else {                    /* it's a memory reference */
2168         if (input->basereg == -1
2169             && (input->indexreg == -1 || input->scale == 0)) {
2170             /* it's a pure offset */
2171             if (bits == 64 && (~input->type & IP_REL)) {
2172               int scale, index, base;
2173               output->sib_present = true;
2174               scale = 0;
2175               index = 4;
2176               base = 5;
2177               output->sib = (scale << 6) | (index << 3) | base;
2178               output->bytes = 4;
2179               output->modrm = 4 | ((rfield & 7) << 3);
2180               output->rip = false;
2181             } else {
2182               output->sib_present = false;
2183               output->bytes = (addrbits != 16 ? 4 : 2);
2184               output->modrm = (addrbits != 16 ? 5 : 6) | ((rfield & 7) << 3);
2185               output->rip = bits == 64;
2186             }
2187         } else {                /* it's an indirection */
2188             int i = input->indexreg, b = input->basereg, s = input->scale;
2189             int32_t o = input->offset, seg = input->segment;
2190             int hb = input->hintbase, ht = input->hinttype;
2191             int t;
2192             int it, bt;
2193             int32_t ix, bx;     /* register flags */
2194
2195             if (s == 0)
2196                 i = -1;         /* make this easy, at least */
2197
2198             if (i >= EXPR_REG_START && i < REG_ENUM_LIMIT) {
2199                 it = nasm_regvals[i];
2200                 ix = nasm_reg_flags[i];
2201             } else {
2202                 it = -1;
2203                 ix = 0;
2204             }
2205
2206             if (b >= EXPR_REG_START && b < REG_ENUM_LIMIT) {
2207                 bt = nasm_regvals[b];
2208                 bx = nasm_reg_flags[b];
2209             } else {
2210                 bt = -1;
2211                 bx = 0;
2212             }
2213
2214             /* check for a 32/64-bit memory reference... */
2215             if ((ix|bx) & (BITS32|BITS64)) {
2216                 /* it must be a 32/64-bit memory reference. Firstly we have
2217                  * to check that all registers involved are type E/Rxx. */
2218                 int32_t sok = BITS32|BITS64;
2219
2220                 if (it != -1) {
2221                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2222                         sok &= ix;
2223                     else
2224                         return NULL;
2225                 }
2226
2227                 if (bt != -1) {
2228                     if (REG_GPR & ~bx)
2229                         return NULL; /* Invalid register */
2230                     if (~sok & bx & SIZE_MASK)
2231                         return NULL; /* Invalid size */
2232                     sok &= bx;
2233                 }
2234
2235                 /* While we're here, ensure the user didn't specify
2236                    WORD or QWORD. */
2237                 if (input->disp_size == 16 || input->disp_size == 64)
2238                     return NULL;
2239
2240                 if (addrbits == 16 ||
2241                     (addrbits == 32 && !(sok & BITS32)) ||
2242                     (addrbits == 64 && !(sok & BITS64)))
2243                     return NULL;
2244
2245                 /* now reorganize base/index */
2246                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2247                     ((hb == b && ht == EAH_NOTBASE)
2248                      || (hb == i && ht == EAH_MAKEBASE))) {
2249                     /* swap if hints say so */
2250                     t = bt, bt = it, it = t;
2251                     t = bx, bx = ix, ix = t;
2252                 }
2253                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2254                     bt = -1, bx = 0, s++;
2255                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2256                     /* make single reg base, unless hint */
2257                     bt = it, bx = ix, it = -1, ix = 0;
2258                 }
2259                 if (((s == 2 && it != REG_NUM_ESP
2260                       && !(input->eaflags & EAF_TIMESTWO)) || s == 3
2261                      || s == 5 || s == 9) && bt == -1)
2262                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2263                 if (it == -1 && (bt & 7) != REG_NUM_ESP
2264                     && (input->eaflags & EAF_TIMESTWO))
2265                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2266                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2267                 if (s == 1 && it == REG_NUM_ESP) {
2268                     /* swap ESP into base if scale is 1 */
2269                     t = it, it = bt, bt = t;
2270                     t = ix, ix = bx, bx = t;
2271                 }
2272                 if (it == REG_NUM_ESP
2273                     || (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2274                     return NULL;        /* wrong, for various reasons */
2275
2276                 output->rex |= rexflags(it, ix, REX_X);
2277                 output->rex |= rexflags(bt, bx, REX_B);
2278
2279                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2280                     /* no SIB needed */
2281                     int mod, rm;
2282
2283                     if (bt == -1) {
2284                         rm = 5;
2285                         mod = 0;
2286                     } else {
2287                         rm = (bt & 7);
2288                         if (rm != REG_NUM_EBP && o == 0 &&
2289                                 seg == NO_SEG && !forw_ref &&
2290                                 !(input->eaflags &
2291                                   (EAF_BYTEOFFS | EAF_WORDOFFS)))
2292                             mod = 0;
2293                         else if (input->eaflags & EAF_BYTEOFFS ||
2294                                  (o >= -128 && o <= 127 && seg == NO_SEG
2295                                   && !forw_ref
2296                                   && !(input->eaflags & EAF_WORDOFFS)))
2297                             mod = 1;
2298                         else
2299                             mod = 2;
2300                     }
2301
2302                     output->sib_present = false;
2303                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2304                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2305                 } else {
2306                     /* we need a SIB */
2307                     int mod, scale, index, base;
2308
2309                     if (it == -1)
2310                         index = 4, s = 1;
2311                     else
2312                         index = (it & 7);
2313
2314                     switch (s) {
2315                     case 1:
2316                         scale = 0;
2317                         break;
2318                     case 2:
2319                         scale = 1;
2320                         break;
2321                     case 4:
2322                         scale = 2;
2323                         break;
2324                     case 8:
2325                         scale = 3;
2326                         break;
2327                     default:   /* then what the smeg is it? */
2328                         return NULL;    /* panic */
2329                     }
2330
2331                     if (bt == -1) {
2332                         base = 5;
2333                         mod = 0;
2334                     } else {
2335                         base = (bt & 7);
2336                         if (base != REG_NUM_EBP && o == 0 &&
2337                                     seg == NO_SEG && !forw_ref &&
2338                                     !(input->eaflags &
2339                                       (EAF_BYTEOFFS | EAF_WORDOFFS)))
2340                             mod = 0;
2341                         else if (input->eaflags & EAF_BYTEOFFS ||
2342                                  (o >= -128 && o <= 127 && seg == NO_SEG
2343                                   && !forw_ref
2344                                   && !(input->eaflags & EAF_WORDOFFS)))
2345                             mod = 1;
2346                         else
2347                             mod = 2;
2348                     }
2349
2350                     output->sib_present = true;
2351                     output->bytes =  (bt == -1 || mod == 2 ? 4 : mod);
2352                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | 4;
2353                     output->sib = (scale << 6) | (index << 3) | base;
2354                 }
2355             } else {            /* it's 16-bit */
2356                 int mod, rm;
2357
2358                 /* check for 64-bit long mode */
2359                 if (addrbits == 64)
2360                     return NULL;
2361
2362                 /* check all registers are BX, BP, SI or DI */
2363                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI
2364                      && b != R_DI) || (i != -1 && i != R_BP && i != R_BX
2365                                        && i != R_SI && i != R_DI))
2366                     return NULL;
2367
2368                 /* ensure the user didn't specify DWORD/QWORD */
2369                 if (input->disp_size == 32 || input->disp_size == 64)
2370                     return NULL;
2371
2372                 if (s != 1 && i != -1)
2373                     return NULL;        /* no can do, in 16-bit EA */
2374                 if (b == -1 && i != -1) {
2375                     int tmp = b;
2376                     b = i;
2377                     i = tmp;
2378                 }               /* swap */
2379                 if ((b == R_SI || b == R_DI) && i != -1) {
2380                     int tmp = b;
2381                     b = i;
2382                     i = tmp;
2383                 }
2384                 /* have BX/BP as base, SI/DI index */
2385                 if (b == i)
2386                     return NULL;        /* shouldn't ever happen, in theory */
2387                 if (i != -1 && b != -1 &&
2388                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2389                     return NULL;        /* invalid combinations */
2390                 if (b == -1)    /* pure offset: handled above */
2391                     return NULL;        /* so if it gets to here, panic! */
2392
2393                 rm = -1;
2394                 if (i != -1)
2395                     switch (i * 256 + b) {
2396                     case R_SI * 256 + R_BX:
2397                         rm = 0;
2398                         break;
2399                     case R_DI * 256 + R_BX:
2400                         rm = 1;
2401                         break;
2402                     case R_SI * 256 + R_BP:
2403                         rm = 2;
2404                         break;
2405                     case R_DI * 256 + R_BP:
2406                         rm = 3;
2407                         break;
2408                 } else
2409                     switch (b) {
2410                     case R_SI:
2411                         rm = 4;
2412                         break;
2413                     case R_DI:
2414                         rm = 5;
2415                         break;
2416                     case R_BP:
2417                         rm = 6;
2418                         break;
2419                     case R_BX:
2420                         rm = 7;
2421                         break;
2422                     }
2423                 if (rm == -1)   /* can't happen, in theory */
2424                     return NULL;        /* so panic if it does */
2425
2426                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2427                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2428                     mod = 0;
2429                 else if (input->eaflags & EAF_BYTEOFFS ||
2430                          (o >= -128 && o <= 127 && seg == NO_SEG
2431                           && !forw_ref
2432                           && !(input->eaflags & EAF_WORDOFFS)))
2433                     mod = 1;
2434                 else
2435                     mod = 2;
2436
2437                 output->sib_present = false;    /* no SIB - it's 16-bit */
2438                 output->bytes = mod;    /* bytes of offset needed */
2439                 output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2440             }
2441         }
2442     }
2443
2444     output->size = 1 + output->sib_present + output->bytes;
2445     return output;
2446 }
2447
2448 static void add_asp(insn *ins, int addrbits)
2449 {
2450     int j, valid;
2451     int defdisp;
2452
2453     valid = (addrbits == 64) ? 64|32 : 32|16;
2454
2455     switch (ins->prefixes[PPS_ASIZE]) {
2456     case P_A16:
2457         valid &= 16;
2458         break;
2459     case P_A32:
2460         valid &= 32;
2461         break;
2462     case P_A64:
2463         valid &= 64;
2464         break;
2465     case P_ASP:
2466         valid &= (addrbits == 32) ? 16 : 32;
2467         break;
2468     default:
2469         break;
2470     }
2471
2472     for (j = 0; j < ins->operands; j++) {
2473         if (!(MEMORY & ~ins->oprs[j].type)) {
2474             int32_t i, b;
2475
2476             /* Verify as Register */
2477             if (ins->oprs[j].indexreg < EXPR_REG_START
2478                 || ins->oprs[j].indexreg >= REG_ENUM_LIMIT)
2479                 i = 0;
2480             else
2481                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2482
2483             /* Verify as Register */
2484             if (ins->oprs[j].basereg < EXPR_REG_START
2485                 || ins->oprs[j].basereg >= REG_ENUM_LIMIT)
2486                 b = 0;
2487             else
2488                 b = nasm_reg_flags[ins->oprs[j].basereg];
2489
2490             if (ins->oprs[j].scale == 0)
2491                 i = 0;
2492
2493             if (!i && !b) {
2494                 int ds = ins->oprs[j].disp_size;
2495                 if ((addrbits != 64 && ds > 8) ||
2496                     (addrbits == 64 && ds == 16))
2497                     valid &= ds;
2498             } else {
2499                 if (!(REG16 & ~b))
2500                     valid &= 16;
2501                 if (!(REG32 & ~b))
2502                     valid &= 32;
2503                 if (!(REG64 & ~b))
2504                     valid &= 64;
2505
2506                 if (!(REG16 & ~i))
2507                     valid &= 16;
2508                 if (!(REG32 & ~i))
2509                     valid &= 32;
2510                 if (!(REG64 & ~i))
2511                     valid &= 64;
2512             }
2513         }
2514     }
2515
2516     if (valid & addrbits) {
2517         ins->addr_size = addrbits;
2518     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2519         /* Add an address size prefix */
2520         enum prefixes pref = (addrbits == 32) ? P_A16 : P_A32;
2521         ins->prefixes[PPS_ASIZE] = pref;
2522         ins->addr_size = (addrbits == 32) ? 16 : 32;
2523     } else {
2524         /* Impossible... */
2525         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2526         ins->addr_size = addrbits; /* Error recovery */
2527     }
2528
2529     defdisp = ins->addr_size == 16 ? 16 : 32;
2530
2531     for (j = 0; j < ins->operands; j++) {
2532         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2533             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp)
2534             != ins->addr_size) {
2535             /* mem_offs sizes must match the address size; if not,
2536                strip the MEM_OFFS bit and match only EA instructions */
2537             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2538         }
2539     }
2540 }