assemble.c

   1 /* assemble.c   code generation for the Netwide Assembler
   2  *
   3  * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
   4  * Julian Hall. All rights reserved. The software is
   5  * redistributable under the license given in the file "LICENSE"
   6  * distributed in the NASM archive.
   7  *
   8  * the actual codes (C syntax, i.e. octal):
   9  * \0            - terminates the code. (Unless it's a literal of course.)
  10  * \1, \2, \3    - that many literal bytes follow in the code stream
  11  * \10..\13      - a literal byte follows in the code stream, to be added
  12  *                 to the register value of operand 0..3
  13  * \14..\17      - a signed byte immediate operand, from operand 0..3
  14  * \20..\23      - a byte immediate operand, from operand 0..3
  15  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  16  * \30..\33      - a word immediate operand, from operand 0..3
  17  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  18  *                 assembly mode or the operand-size override on the operand
  19  * \40..\43      - a long immediate operand, from operand 0..3
  20  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  21  *                 depending on the address size of the instruction.
  22  * \50..\53      - a byte relative operand, from operand 0..3
  23  * \54..\57      - a qword immediate operand, from operand 0..3
  24  * \60..\63      - a word relative operand, from operand 0..3
  25  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  26  *                 assembly mode or the operand-size override on the operand
  27  * \70..\73      - a long relative operand, from operand 0..3
  28  * \74..\77      - a word constant, from the _segment_ part of operand 0..3
  29  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  30  *                 field the register value of operand b.
  31  * \140..\143    - an immediate word or signed byte for operand 0..3
  32  * \144..\147    - or 2 (s-field) into opcode byte if operand 0..3
  33  *                  is a signed byte rather than a word.  Opcode byte follows.
  34  * \150..\153    - an immediate dword or signed byte for operand 0..3
  35  * \154..\157    - or 2 (s-field) into opcode byte if operand 0..3
  36  *                  is a signed byte rather than a dword.  Opcode byte follows.
  37  * \160..\163    - this instruction uses DREX rather than REX, with the
  38  *                 OC0 field set to 0, and the dest field taken from
  39  *                 operand 0..3.
  40  * \164..\167    - this instruction uses DREX rather than REX, with the
  41  *                 OC0 field set to 1, and the dest field taken from
  42  *                 operand 0..3.
  43  * \171          - placement of DREX suffix in the absence of an EA
  44  * \172\ab       - the register number from operand a in bits 7..4, with
  45  *                 the 4-bit immediate from operand b in bits 3..0.
  46  * \173\xab      - the register number from operand a in bits 7..4, with
  47  *                 the value b in bits 3..0.
  48  * \174\a        - the register number from operand a in bits 7..4, and
  49  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  50  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  51  *                 field equal to digit b.
  52  * \250..\253    - same as \150..\153, except warn if the 64-bit operand
  53  *                 is not equal to the truncated and sign-extended 32-bit
  54  *                 operand; used for 32-bit immediates in 64-bit mode.
  55  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
  56  * \260..\263    - this instruction uses VEX rather than REX, with the
  57  *                 V field taken from operand 0..3.
  58  * \270          - this instruction uses VEX rather than REX, with the
  59  *                 V field set to 1111b.
  60  *
  61  * VEX prefixes are followed by the sequence:
  62  * \mm\wlp         where mm is the M field; and wlp is:
  63  *                 00 0ww lpp
  64  *                 [w0] ww = 0 for W = 0
  65  *                 [w1] ww = 1 for W = 1
  66  *                 [wx] ww = 2 for W don't care (always assembled as 0)
  67  *                 [ww] ww = 3 for W used as REX.W
  68  *
  69  *
  70  * \274..\277    - a signed byte immediate operand, from operand 0..3,
  71  *                 which is to be extended to the operand size.
  72  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
  73  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
  74  * \312          - (disassembler only) marker on LOOP, LOOPxx instructions.
  75  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
  76  * \314          - (disassembler only) invalid with REX.B
  77  * \315          - (disassembler only) invalid with REX.X
  78  * \316          - (disassembler only) invalid with REX.R
  79  * \317          - (disassembler only) invalid with REX.W
  80  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
  81  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
  82  * \322          - indicates that this instruction is only valid when the
  83  *                 operand size is the default (instruction to disassembler,
  84  *                 generates no code in the assembler)
  85  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
  86  * \324          - indicates 64-bit operand size requiring REX prefix.
  87  * \330          - a literal byte follows in the code stream, to be added
  88  *                 to the condition code value of the instruction.
  89  * \331          - instruction not valid with REP prefix.  Hint for
  90  *                 disassembler only; for SSE instructions.
  91  * \332          - REP prefix (0xF2 byte) used as opcode extension.
  92  * \333          - REP prefix (0xF3 byte) used as opcode extension.
  93  * \334          - LOCK prefix used instead of REX.R
  94  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
  95  * \336          - force a REP(E) prefix (0xF2) even if not specified.
  96  * \337          - force a REPNE prefix (0xF3) even if not specified.
  97  *                 \336-\337 are still listed as prefixes in the disassembler.
  98  * \340          - reserve <operand 0> bytes of uninitialized storage.
  99  *                 Operand 0 had better be a segmentless constant.
 100  * \344,\345     - the PUSH/POP (respectively) codes for CS, DS, ES, SS
 101  *                 (POP is never used for CS) depending on operand 0
 102  * \346,\347     - the second byte of PUSH/POP codes for FS, GS, depending
 103  *                 on operand 0
 104  * \360          - no SSE prefix (== \364\331)
 105  * \361          - 66 SSE prefix (== \366\331)
 106  * \362          - F2 SSE prefix (== \364\332)
 107  * \363          - F3 SSE prefix (== \364\333)
 108  * \364          - operand-size prefix (0x66) not permitted
 109  * \365          - address-size prefix (0x67) not permitted
 110  * \366          - operand-size prefix (0x66) used as opcode extension
 111  * \367          - address-size prefix (0x67) used as opcode extension
 112  * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
 113  *                 370 is used for Jcc, 371 is used for JMP.
 114  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 115  *                 used for conditional jump over longer jump
 116  */
 117
 118 #include "compiler.h"
 119
 120 #include <stdio.h>
 121 #include <string.h>
 122 #include <inttypes.h>
 123
 124 #include "nasm.h"
 125 #include "nasmlib.h"
 126 #include "assemble.h"
 127 #include "insns.h"
 128 #include "tables.h"
 129
 130 /* Initialized to zero by the C standard */
 131 static const uint8_t const_zero_buf[256];
 132
 133 typedef struct {
 134     int sib_present;                 /* is a SIB byte necessary? */
 135     int bytes;                       /* # of bytes of offset needed */
 136     int size;                        /* lazy - this is sib+bytes+1 */
 137     uint8_t modrm, sib, rex, rip;    /* the bytes themselves */
 138 } ea;
 139
 140 static uint32_t cpu;            /* cpu level received from nasm.c */
 141 static efunc errfunc;
 142 static struct ofmt *outfmt;
 143 static ListGen *list;
 144
 145 static int64_t calcsize(int32_t, int64_t, int, insn *, const uint8_t *);
 146 static void gencode(int32_t segment, int64_t offset, int bits,
 147                     insn * ins, const struct itemplate *temp,
 148                     int64_t insn_end);
 149 static int matches(const struct itemplate *, insn *, int bits);
 150 static int32_t regflag(const operand *);
 151 static int32_t regval(const operand *);
 152 static int rexflags(int, int32_t, int);
 153 static int op_rexflags(const operand *, int);
 154 static ea *process_ea(operand *, ea *, int, int, int, int32_t);
 155 static void add_asp(insn *, int);
 156
 157 static int has_prefix(insn * ins, enum prefix_pos pos, enum prefixes prefix)
 158 {
 159     return ins->prefixes[pos] == prefix;
 160 }
 161
 162 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 163 {
 164     if (ins->prefixes[pos])
 165         errfunc(ERR_NONFATAL, "invalid %s prefix",
 166                 prefix_name(ins->prefixes[pos]));
 167 }
 168
 169 static const char *size_name(int size)
 170 {
 171     switch (size) {
 172     case 1:
 173         return "byte";
 174     case 2:
 175         return "word";
 176     case 4:
 177         return "dword";
 178     case 8:
 179         return "qword";
 180     case 10:
 181         return "tword";
 182     case 16:
 183         return "oword";
 184     case 32:
 185         return "yword";
 186     default:
 187         return "???";
 188     }
 189 }
 190
 191 static void warn_overflow(int size, const struct operand *o)
 192 {
 193     if (size < 8 && o->wrt == NO_SEG && o->segment == NO_SEG) {
 194         int64_t lim = ((int64_t)1 << (size*8))-1;
 195         int64_t data = o->offset;
 196
 197         if (data < ~lim || data > lim)
 198             errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
 199                     "%s data exceeds bounds", size_name(size));
 200     }
 201 }
 202 /*
 203  * This routine wrappers the real output format's output routine,
 204  * in order to pass a copy of the data off to the listing file
 205  * generator at the same time.
 206  */
 207 static void out(int64_t offset, int32_t segto, const void *data,
 208                 enum out_type type, uint64_t size,
 209                 int32_t segment, int32_t wrt)
 210 {
 211     static int32_t lineno = 0;     /* static!!! */
 212     static char *lnfname = NULL;
 213     uint8_t p[8];
 214
 215     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 216         /*
 217          * This is a non-relocated address, and we're going to
 218          * convert it into RAWDATA format.
 219          */
 220         uint8_t *q = p;
 221
 222         if (size > 8) {
 223             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 224             return;
 225         }
 226
 227         WRITEADDR(q, *(int64_t *)data, size);
 228         data = p;
 229         type = OUT_RAWDATA;
 230     }
 231
 232     list->output(offset, data, type, size);
 233
 234     /*
 235      * this call to src_get determines when we call the
 236      * debug-format-specific "linenum" function
 237      * it updates lineno and lnfname to the current values
 238      * returning 0 if "same as last time", -2 if lnfname
 239      * changed, and the amount by which lineno changed,
 240      * if it did. thus, these variables must be static
 241      */
 242
 243     if (src_get(&lineno, &lnfname)) {
 244         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 245     }
 246
 247     outfmt->output(segto, data, type, size, segment, wrt);
 248 }
 249
 250 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 251                      insn * ins, const uint8_t *code)
 252 {
 253     int64_t isize;
 254     uint8_t c = code[0];
 255
 256     if ((c != 0370 && c != 0371) || (ins->oprs[0].type & STRICT))
 257         return false;
 258     if (!optimizing)
 259         return false;
 260     if (optimizing < 0 && c == 0371)
 261         return false;
 262
 263     isize = calcsize(segment, offset, bits, ins, code);
 264     if (ins->oprs[0].segment != segment)
 265         return false;
 266
 267     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 268     return (isize >= -128 && isize <= 127); /* is it byte size? */
 269 }
 270
 271 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 272               insn * instruction, struct ofmt *output, efunc error,
 273               ListGen * listgen)
 274 {
 275     const struct itemplate *temp;
 276     int j;
 277     int size_prob;
 278     int64_t insn_end;
 279     int32_t itimes;
 280     int64_t start = offset;
 281     int64_t wsize = 0;             /* size for DB etc. */
 282
 283     errfunc = error;            /* to pass to other functions */
 284     cpu = cp;
 285     outfmt = output;            /* likewise */
 286     list = listgen;             /* and again */
 287
 288     switch (instruction->opcode) {
 289     case -1:
 290         return 0;
 291     case I_DB:
 292         wsize = 1;
 293         break;
 294     case I_DW:
 295         wsize = 2;
 296         break;
 297     case I_DD:
 298         wsize = 4;
 299         break;
 300     case I_DQ:
 301         wsize = 8;
 302         break;
 303     case I_DT:
 304         wsize = 10;
 305         break;
 306     case I_DO:
 307         wsize = 16;
 308         break;
 309     case I_DY:
 310         wsize = 32;
 311         break;
 312     default:
 313         break;
 314     }
 315
 316     if (wsize) {
 317         extop *e;
 318         int32_t t = instruction->times;
 319         if (t < 0)
 320             errfunc(ERR_PANIC,
 321                     "instruction->times < 0 (%ld) in assemble()", t);
 322
 323         while (t--) {           /* repeat TIMES times */
 324             for (e = instruction->eops; e; e = e->next) {
 325                 if (e->type == EOT_DB_NUMBER) {
 326                     if (wsize == 1) {
 327                         if (e->segment != NO_SEG)
 328                             errfunc(ERR_NONFATAL,
 329                                     "one-byte relocation attempted");
 330                         else {
 331                             uint8_t out_byte = e->offset;
 332                             out(offset, segment, &out_byte,
 333                                 OUT_RAWDATA, 1, NO_SEG, NO_SEG);
 334                         }
 335                     } else if (wsize > 8) {
 336                         errfunc(ERR_NONFATAL,
 337                                 "integer supplied to a DT, DO or DY"
 338                                 " instruction");
 339                     } else
 340                         out(offset, segment, &e->offset,
 341                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 342                     offset += wsize;
 343                 } else if (e->type == EOT_DB_STRING ||
 344                            e->type == EOT_DB_STRING_FREE) {
 345                     int align;
 346
 347                     out(offset, segment, e->stringval,
 348                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 349                     align = e->stringlen % wsize;
 350
 351                     if (align) {
 352                         align = wsize - align;
 353                         out(offset, segment, const_zero_buf,
 354                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 355                     }
 356                     offset += e->stringlen + align;
 357                 }
 358             }
 359             if (t > 0 && t == instruction->times - 1) {
 360                 /*
 361                  * Dummy call to list->output to give the offset to the
 362                  * listing module.
 363                  */
 364                 list->output(offset, NULL, OUT_RAWDATA, 0);
 365                 list->uplevel(LIST_TIMES);
 366             }
 367         }
 368         if (instruction->times > 1)
 369             list->downlevel(LIST_TIMES);
 370         return offset - start;
 371     }
 372
 373     if (instruction->opcode == I_INCBIN) {
 374         const char *fname = instruction->eops->stringval;
 375         FILE *fp;
 376
 377         fp = fopen(fname, "rb");
 378         if (!fp) {
 379             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 380                   fname);
 381         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 382             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 383                   fname);
 384         } else {
 385             static char buf[4096];
 386             size_t t = instruction->times;
 387             size_t base = 0;
 388             size_t len;
 389
 390             len = ftell(fp);
 391             if (instruction->eops->next) {
 392                 base = instruction->eops->next->offset;
 393                 len -= base;
 394                 if (instruction->eops->next->next &&
 395                     len > (size_t)instruction->eops->next->next->offset)
 396                     len = (size_t)instruction->eops->next->next->offset;
 397             }
 398             /*
 399              * Dummy call to list->output to give the offset to the
 400              * listing module.
 401              */
 402             list->output(offset, NULL, OUT_RAWDATA, 0);
 403             list->uplevel(LIST_INCBIN);
 404             while (t--) {
 405                 size_t l;
 406
 407                 fseek(fp, base, SEEK_SET);
 408                 l = len;
 409                 while (l > 0) {
 410                     int32_t m =
 411                         fread(buf, 1, (l > (int32_t) sizeof(buf) ? (int32_t) sizeof(buf) : l),
 412                               fp);
 413                     if (!m) {
 414                         /*
 415                          * This shouldn't happen unless the file
 416                          * actually changes while we are reading
 417                          * it.
 418                          */
 419                         error(ERR_NONFATAL,
 420                               "`incbin': unexpected EOF while"
 421                               " reading file `%s'", fname);
 422                         t = 0;  /* Try to exit cleanly */
 423                         break;
 424                     }
 425                     out(offset, segment, buf, OUT_RAWDATA, m,
 426                         NO_SEG, NO_SEG);
 427                     l -= m;
 428                 }
 429             }
 430             list->downlevel(LIST_INCBIN);
 431             if (instruction->times > 1) {
 432                 /*
 433                  * Dummy call to list->output to give the offset to the
 434                  * listing module.
 435                  */
 436                 list->output(offset, NULL, OUT_RAWDATA, 0);
 437                 list->uplevel(LIST_TIMES);
 438                 list->downlevel(LIST_TIMES);
 439             }
 440             fclose(fp);
 441             return instruction->times * len;
 442         }
 443         return 0;               /* if we're here, there's an error */
 444     }
 445
 446     /* Check to see if we need an address-size prefix */
 447     add_asp(instruction, bits);
 448
 449     size_prob = false;
 450
 451     for (temp = nasm_instructions[instruction->opcode]; temp->opcode != -1; temp++){
 452         int m = matches(temp, instruction, bits);
 453         if (m == 100 ||
 454             (m == 99 && jmp_match(segment, offset, bits,
 455                                   instruction, temp->code))) {
 456             /* Matches! */
 457             int64_t insn_size = calcsize(segment, offset, bits,
 458                                       instruction, temp->code);
 459             itimes = instruction->times;
 460             if (insn_size < 0)  /* shouldn't be, on pass two */
 461                 error(ERR_PANIC, "errors made it through from pass one");
 462             else
 463                 while (itimes--) {
 464                     for (j = 0; j < MAXPREFIX; j++) {
 465                         uint8_t c = 0;
 466                         switch (instruction->prefixes[j]) {
 467                         case P_LOCK:
 468                             c = 0xF0;
 469                             break;
 470                         case P_REPNE:
 471                         case P_REPNZ:
 472                             c = 0xF2;
 473                             break;
 474                         case P_REPE:
 475                         case P_REPZ:
 476                         case P_REP:
 477                             c = 0xF3;
 478                             break;
 479                         case R_CS:
 480                             if (bits == 64) {
 481                                 error(ERR_WARNING | ERR_PASS2,
 482                                       "cs segment base generated, but will be ignored in 64-bit mode");
 483                             }
 484                             c = 0x2E;
 485                             break;
 486                         case R_DS:
 487                             if (bits == 64) {
 488                                 error(ERR_WARNING | ERR_PASS2,
 489                                       "ds segment base generated, but will be ignored in 64-bit mode");
 490                             }
 491                             c = 0x3E;
 492                             break;
 493                         case R_ES:
 494                            if (bits == 64) {
 495                                 error(ERR_WARNING | ERR_PASS2,
 496                                       "es segment base generated, but will be ignored in 64-bit mode");
 497                            }
 498                             c = 0x26;
 499                             break;
 500                         case R_FS:
 501                             c = 0x64;
 502                             break;
 503                         case R_GS:
 504                             c = 0x65;
 505                             break;
 506                         case R_SS:
 507                             if (bits == 64) {
 508                                 error(ERR_WARNING | ERR_PASS2,
 509                                       "ss segment base generated, but will be ignored in 64-bit mode");
 510                             }
 511                             c = 0x36;
 512                             break;
 513                         case R_SEGR6:
 514                         case R_SEGR7:
 515                             error(ERR_NONFATAL,
 516                                   "segr6 and segr7 cannot be used as prefixes");
 517                             break;
 518                         case P_A16:
 519                             if (bits == 64) {
 520                                 error(ERR_NONFATAL,
 521                                       "16-bit addressing is not supported "
 522                                       "in 64-bit mode");
 523                             } else if (bits != 16)
 524                                 c = 0x67;
 525                             break;
 526                         case P_A32:
 527                             if (bits != 32)
 528                                 c = 0x67;
 529                             break;
 530                         case P_A64:
 531                             if (bits != 64) {
 532                                 error(ERR_NONFATAL,
 533                                       "64-bit addressing is only supported "
 534                                       "in 64-bit mode");
 535                             }
 536                             break;
 537                         case P_ASP:
 538                             c = 0x67;
 539                             break;
 540                         case P_O16:
 541                             if (bits != 16)
 542                                 c = 0x66;
 543                             break;
 544                         case P_O32:
 545                             if (bits == 16)
 546                                 c = 0x66;
 547                             break;
 548                         case P_O64:
 549                             /* REX.W */
 550                             break;
 551                         case P_OSP:
 552                             c = 0x66;
 553                             break;
 554                         case P_none:
 555                             break;
 556                         default:
 557                             error(ERR_PANIC, "invalid instruction prefix");
 558                         }
 559                         if (c != 0) {
 560                             out(offset, segment, &c, OUT_RAWDATA, 1,
 561                                 NO_SEG, NO_SEG);
 562                             offset++;
 563                         }
 564                     }
 565                     insn_end = offset + insn_size;
 566                     gencode(segment, offset, bits, instruction,
 567                             temp, insn_end);
 568                     offset += insn_size;
 569                     if (itimes > 0 && itimes == instruction->times - 1) {
 570                         /*
 571                          * Dummy call to list->output to give the offset to the
 572                          * listing module.
 573                          */
 574                         list->output(offset, NULL, OUT_RAWDATA, 0);
 575                         list->uplevel(LIST_TIMES);
 576                     }
 577                 }
 578             if (instruction->times > 1)
 579                 list->downlevel(LIST_TIMES);
 580             return offset - start;
 581         } else if (m > 0 && m > size_prob) {
 582             size_prob = m;
 583         }
 584     }
 585
 586     if (temp->opcode == -1) {   /* didn't match any instruction */
 587         switch (size_prob) {
 588         case 1:
 589             error(ERR_NONFATAL, "operation size not specified");
 590             break;
 591         case 2:
 592             error(ERR_NONFATAL, "mismatch in operand sizes");
 593             break;
 594         case 3:
 595             error(ERR_NONFATAL, "no instruction for this cpu level");
 596             break;
 597         case 4:
 598             error(ERR_NONFATAL, "instruction not supported in 64-bit mode");
 599             break;
 600         default:
 601             error(ERR_NONFATAL,
 602                   "invalid combination of opcode and operands");
 603             break;
 604         }
 605     }
 606     return 0;
 607 }
 608
 609 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 610                insn * instruction, efunc error)
 611 {
 612     const struct itemplate *temp;
 613
 614     errfunc = error;            /* to pass to other functions */
 615     cpu = cp;
 616
 617     if (instruction->opcode == -1)
 618         return 0;
 619
 620     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 621         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 622         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 623         instruction->opcode == I_DY) {
 624         extop *e;
 625         int32_t isize, osize, wsize = 0;   /* placate gcc */
 626
 627         isize = 0;
 628         switch (instruction->opcode) {
 629         case I_DB:
 630             wsize = 1;
 631             break;
 632         case I_DW:
 633             wsize = 2;
 634             break;
 635         case I_DD:
 636             wsize = 4;
 637             break;
 638         case I_DQ:
 639             wsize = 8;
 640             break;
 641         case I_DT:
 642             wsize = 10;
 643             break;
 644         case I_DO:
 645             wsize = 16;
 646             break;
 647         case I_DY:
 648             wsize = 32;
 649             break;
 650         default:
 651             break;
 652         }
 653
 654         for (e = instruction->eops; e; e = e->next) {
 655             int32_t align;
 656
 657             osize = 0;
 658             if (e->type == EOT_DB_NUMBER)
 659                 osize = 1;
 660             else if (e->type == EOT_DB_STRING ||
 661                      e->type == EOT_DB_STRING_FREE)
 662                 osize = e->stringlen;
 663
 664             align = (-osize) % wsize;
 665             if (align < 0)
 666                 align += wsize;
 667             isize += osize + align;
 668         }
 669         return isize * instruction->times;
 670     }
 671
 672     if (instruction->opcode == I_INCBIN) {
 673         const char *fname = instruction->eops->stringval;
 674         FILE *fp;
 675         size_t len;
 676
 677         fp = fopen(fname, "rb");
 678         if (!fp)
 679             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 680                   fname);
 681         else if (fseek(fp, 0L, SEEK_END) < 0)
 682             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 683                   fname);
 684         else {
 685             len = ftell(fp);
 686             fclose(fp);
 687             if (instruction->eops->next) {
 688                 len -= instruction->eops->next->offset;
 689                 if (instruction->eops->next->next &&
 690                     len > (size_t)instruction->eops->next->next->offset) {
 691                     len = (size_t)instruction->eops->next->next->offset;
 692                 }
 693             }
 694             return instruction->times * len;
 695         }
 696         return 0;               /* if we're here, there's an error */
 697     }
 698
 699     /* Check to see if we need an address-size prefix */
 700     add_asp(instruction, bits);
 701
 702     for (temp = nasm_instructions[instruction->opcode]; temp->opcode != -1; temp++) {
 703         int m = matches(temp, instruction, bits);
 704         if (m == 100 ||
 705             (m == 99 && jmp_match(segment, offset, bits,
 706                                   instruction, temp->code))) {
 707             /* we've matched an instruction. */
 708             int64_t isize;
 709             const uint8_t *codes = temp->code;
 710             int j;
 711
 712             isize = calcsize(segment, offset, bits, instruction, codes);
 713             if (isize < 0)
 714                 return -1;
 715             for (j = 0; j < MAXPREFIX; j++) {
 716                 switch (instruction->prefixes[j]) {
 717                 case P_A16:
 718                     if (bits != 16)
 719                         isize++;
 720                     break;
 721                 case P_A32:
 722                     if (bits != 32)
 723                         isize++;
 724                     break;
 725                 case P_O16:
 726                     if (bits != 16)
 727                         isize++;
 728                     break;
 729                 case P_O32:
 730                     if (bits == 16)
 731                         isize++;
 732                     break;
 733                 case P_A64:
 734                 case P_O64:
 735                 case P_none:
 736                     break;
 737                 default:
 738                     isize++;
 739                     break;
 740                 }
 741             }
 742             return isize * instruction->times;
 743         }
 744     }
 745     return -1;                  /* didn't match any instruction */
 746 }
 747
 748 static bool possible_sbyte(operand *o)
 749 {
 750     return o->wrt == NO_SEG && o->segment == NO_SEG &&
 751         !(o->opflags & OPFLAG_FORWARD) &&
 752         optimizing >= 0 && !(o->type & STRICT);
 753 }
 754
 755 /* check that opn[op]  is a signed byte of size 16 or 32 */
 756 static bool is_sbyte16(operand *o)
 757 {
 758     int16_t v;
 759
 760     if (!possible_sbyte(o))
 761         return false;
 762
 763     v = o->offset;
 764     return v >= -128 && v <= 127;
 765 }
 766
 767 static bool is_sbyte32(operand *o)
 768 {
 769     int32_t v;
 770
 771     if (!possible_sbyte(o))
 772         return false;
 773
 774     v = o->offset;
 775     return v >= -128 && v <= 127;
 776 }
 777
 778 /* Common construct */
 779 #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
 780
 781 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 782                         insn * ins, const uint8_t *codes)
 783 {
 784     int64_t length = 0;
 785     uint8_t c;
 786     int rex_mask = ~0;
 787     struct operand *opx;
 788
 789     ins->rex = 0;               /* Ensure REX is reset */
 790
 791     if (ins->prefixes[PPS_OSIZE] == P_O64)
 792         ins->rex |= REX_W;
 793
 794     (void)segment;              /* Don't warn that this parameter is unused */
 795     (void)offset;               /* Don't warn that this parameter is unused */
 796
 797     while (*codes) {
 798         c = *codes++;
 799         opx = &ins->oprs[c & 3];
 800         switch (c) {
 801         case 01:
 802         case 02:
 803         case 03:
 804             codes += c, length += c;
 805             break;
 806
 807         case4(010):
 808             ins->rex |=
 809                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 810             codes++, length++;
 811             break;
 812
 813         case4(014):
 814         case4(020):
 815         case4(024):
 816             length++;
 817             break;
 818
 819         case4(030):
 820             length += 2;
 821             break;
 822
 823         case4(034):
 824             if (opx->type & (BITS16 | BITS32 | BITS64))
 825                 length += (opx->type & BITS16) ? 2 : 4;
 826             else
 827                 length += (bits == 16) ? 2 : 4;
 828             break;
 829
 830         case4(040):
 831             length += 4;
 832             break;
 833
 834         case4(044):
 835             length += ins->addr_size >> 3;
 836             break;
 837
 838         case4(050):
 839             length++;
 840             break;
 841
 842         case4(054):
 843             length += 8; /* MOV reg64/imm */
 844             break;
 845
 846         case4(060):
 847             length += 2;
 848             break;
 849
 850         case4(064):
 851             if (opx->type & (BITS16 | BITS32 | BITS64))
 852                 length += (opx->type & BITS16) ? 2 : 4;
 853             else
 854                 length += (bits == 16) ? 2 : 4;
 855             break;
 856
 857         case4(070):
 858             length += 4;
 859             break;
 860
 861         case4(074):
 862             length += 2;
 863             break;
 864
 865         case4(0140):
 866             length += is_sbyte16(opx) ? 1 : 2;
 867             break;
 868
 869         case4(0144):
 870             codes++;
 871             length++;
 872             break;
 873
 874         case4(0150):
 875             length += is_sbyte32(opx) ? 1 : 4;
 876             break;
 877
 878         case4(0154):
 879             codes++;
 880             length++;
 881             break;
 882
 883         case4(0160):
 884             length++;
 885             ins->rex |= REX_D;
 886             ins->drexdst = regval(opx);
 887             break;
 888
 889         case4(0164):
 890             length++;
 891             ins->rex |= REX_D|REX_OC;
 892             ins->drexdst = regval(opx);
 893             break;
 894
 895         case 0171:
 896             break;
 897
 898         case 0172:
 899         case 0173:
 900         case 0174:
 901             codes++;
 902             length++;
 903             break;
 904
 905         case4(0250):
 906             length += is_sbyte32(opx) ? 1 : 4;
 907             break;
 908
 909         case4(0254):
 910             length += 4;
 911             break;
 912
 913         case4(0260):
 914             ins->rex |= REX_V;
 915             ins->drexdst = regval(opx);
 916             ins->vex_m = *codes++;
 917             ins->vex_wlp = *codes++;
 918             break;
 919
 920         case 0270:
 921             ins->rex |= REX_V;
 922             ins->drexdst = 0;
 923             ins->vex_m = *codes++;
 924             ins->vex_wlp = *codes++;
 925             break;
 926
 927         case4(0274):
 928             length++;
 929             break;
 930
 931         case4(0300):
 932             break;
 933
 934         case 0310:
 935             if (bits == 64)
 936                 return -1;
 937             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
 938             break;
 939
 940         case 0311:
 941             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
 942             break;
 943
 944         case 0312:
 945             break;
 946
 947         case 0313:
 948             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
 949                 has_prefix(ins, PPS_ASIZE, P_A32))
 950                 return -1;
 951             break;
 952
 953         case4(0314):
 954             break;
 955
 956         case 0320:
 957             length += (bits != 16);
 958             break;
 959
 960         case 0321:
 961             length += (bits == 16);
 962             break;
 963
 964         case 0322:
 965             break;
 966
 967         case 0323:
 968             rex_mask &= ~REX_W;
 969             break;
 970
 971         case 0324:
 972             ins->rex |= REX_W;
 973             break;
 974
 975         case 0330:
 976             codes++, length++;
 977             break;
 978
 979         case 0331:
 980             break;
 981
 982         case 0332:
 983         case 0333:
 984             length++;
 985             break;
 986
 987         case 0334:
 988             ins->rex |= REX_L;
 989             break;
 990
 991         case 0335:
 992             break;
 993
 994         case 0336:
 995             if (!ins->prefixes[PPS_LREP])
 996                 ins->prefixes[PPS_LREP] = P_REP;
 997             break;
 998
 999         case 0337:
1000             if (!ins->prefixes[PPS_LREP])
1001                 ins->prefixes[PPS_LREP] = P_REPNE;
1002             break;
1003
1004         case 0340:
1005             if (ins->oprs[0].segment != NO_SEG)
1006                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1007                         " quantity of BSS space");
1008             else
1009                 length += ins->oprs[0].offset;
1010             break;
1011
1012         case4(0344):
1013             length++;
1014             break;
1015
1016         case 0360:
1017             break;
1018
1019         case 0361:
1020         case 0362:
1021         case 0363:
1022             length++;
1023             break;
1024
1025         case 0364:
1026         case 0365:
1027             break;
1028
1029         case 0366:
1030         case 0367:
1031             length++;
1032             break;
1033
1034         case 0370:
1035         case 0371:
1036         case 0372:
1037             break;
1038
1039         case 0373:
1040             length++;
1041             break;
1042
1043         case4(0100):
1044         case4(0110):
1045         case4(0120):
1046         case4(0130):
1047         case4(0200):
1048         case4(0204):
1049         case4(0210):
1050         case4(0214):
1051         case4(0220):
1052         case4(0224):
1053         case4(0230):
1054         case4(0234):
1055             {
1056                 ea ea_data;
1057                 int rfield;
1058                 int32_t rflags;
1059                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1060
1061                 if (c <= 0177) {
1062                     /* pick rfield from operand b */
1063                     rflags = regflag(&ins->oprs[c & 7]);
1064                     rfield = nasm_regvals[ins->oprs[c & 7].basereg];
1065                 } else {
1066                     rflags = 0;
1067                     rfield = c & 7;
1068                 }
1069
1070                 if (!process_ea
1071                     (&ins->oprs[(c >> 3) & 7], &ea_data, bits,
1072                      ins->addr_size, rfield, rflags)) {
1073                     errfunc(ERR_NONFATAL, "invalid effective address");
1074                     return -1;
1075                 } else {
1076                     ins->rex |= ea_data.rex;
1077                     length += ea_data.size;
1078                 }
1079             }
1080             break;
1081
1082         default:
1083             errfunc(ERR_PANIC, "internal instruction table corrupt"
1084                     ": instruction code 0x%02X given", c);
1085             break;
1086         }
1087     }
1088
1089     ins->rex &= rex_mask;
1090
1091     if (ins->rex & REX_V) {
1092         int bad32 = REX_R|REX_W|REX_X|REX_B;
1093
1094         if (ins->rex & REX_H) {
1095             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1096             return -1;
1097         }
1098         switch (ins->vex_wlp & 030) {
1099         case 000:
1100         case 020:
1101             ins->rex &= ~REX_W;
1102             break;
1103         case 010:
1104             ins->rex |= REX_W;
1105             bad32 &= ~REX_W;
1106             break;
1107         case 030:
1108             /* Follow REX_W */
1109             break;
1110         }
1111
1112         if (bits != 64 && ((ins->rex & bad32) || ins->drexdst > 7)) {
1113             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1114             return -1;
1115         }
1116         if (ins->vex_m != 1 || (ins->rex & (REX_W|REX_R|REX_B)))
1117             length += 3;
1118         else
1119             length += 2;
1120     } else if (ins->rex & REX_D) {
1121         if (ins->rex & REX_H) {
1122             errfunc(ERR_NONFATAL, "cannot use high register in drex instruction");
1123             return -1;
1124         }
1125         if (bits != 64 && ((ins->rex & (REX_R|REX_W|REX_X|REX_B)) ||
1126                            ins->drexdst > 7)) {
1127             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1128             return -1;
1129         }
1130         length++;
1131     } else if (ins->rex & REX_REAL) {
1132         if (ins->rex & REX_H) {
1133             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1134             return -1;
1135         } else if (bits == 64) {
1136             length++;
1137         } else if ((ins->rex & REX_L) &&
1138                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1139                    cpu >= IF_X86_64) {
1140             /* LOCK-as-REX.R */
1141             assert_no_prefix(ins, PPS_LREP);
1142             length++;
1143         } else {
1144             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1145             return -1;
1146         }
1147     }
1148
1149     return length;
1150 }
1151
1152 #define EMIT_REX()                                                      \
1153     if (!(ins->rex & (REX_D|REX_V)) && (ins->rex & REX_REAL) && (bits == 64)) { \
1154         ins->rex = (ins->rex & REX_REAL)|REX_P;                         \
1155         out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG); \
1156         ins->rex = 0;                                                   \
1157         offset += 1; \
1158     }
1159
1160 static void gencode(int32_t segment, int64_t offset, int bits,
1161                     insn * ins, const struct itemplate *temp,
1162                     int64_t insn_end)
1163 {
1164     static char condval[] = {   /* conditional opcodes */
1165         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1166         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1167         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1168     };
1169     uint8_t c;
1170     uint8_t bytes[4];
1171     int64_t size;
1172     int64_t data;
1173     struct operand *opx;
1174     const uint8_t *codes = temp->code;
1175
1176     while (*codes) {
1177         c = *codes++;
1178         opx = &ins->oprs[c & 3];
1179         switch (c) {
1180         case 01:
1181         case 02:
1182         case 03:
1183             EMIT_REX();
1184             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1185             codes += c;
1186             offset += c;
1187             break;
1188
1189         case4(010):
1190             EMIT_REX();
1191             bytes[0] = *codes++ + ((regval(opx)) & 7);
1192             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1193             offset += 1;
1194             break;
1195
1196         case4(014):
1197             /* The test for BITS8 and SBYTE here is intended to avoid
1198                warning on optimizer actions due to SBYTE, while still
1199                warn on explicit BYTE directives.  Also warn, obviously,
1200                if the optimizer isn't enabled. */
1201             if (((opx->type & BITS8) ||
1202                  !(opx->type & temp->opd[c & 3] & BYTENESS)) &&
1203                 (opx->offset < -128 || opx->offset > 127)) {
1204                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1205                         "signed byte value exceeds bounds");
1206             }
1207             if (opx->segment != NO_SEG) {
1208                 data = opx->offset;
1209                 out(offset, segment, &data, OUT_ADDRESS, 1,
1210                     opx->segment, opx->wrt);
1211             } else {
1212                 bytes[0] = opx->offset;
1213                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1214                     NO_SEG);
1215             }
1216             offset += 1;
1217             break;
1218
1219         case4(020):
1220             if (opx->offset < -256 || opx->offset > 255) {
1221                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1222                         "byte value exceeds bounds");
1223             }
1224             if (opx->segment != NO_SEG) {
1225                 data = opx->offset;
1226                 out(offset, segment, &data, OUT_ADDRESS, 1,
1227                     opx->segment, opx->wrt);
1228             } else {
1229                 bytes[0] = opx->offset;
1230                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1231                     NO_SEG);
1232             }
1233             offset += 1;
1234             break;
1235
1236         case4(024):
1237             if (opx->offset < 0 || opx->offset > 255)
1238                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1239                         "unsigned byte value exceeds bounds");
1240             if (opx->segment != NO_SEG) {
1241                 data = opx->offset;
1242                 out(offset, segment, &data, OUT_ADDRESS, 1,
1243                     opx->segment, opx->wrt);
1244             } else {
1245                 bytes[0] = opx->offset;
1246                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1247                     NO_SEG);
1248             }
1249             offset += 1;
1250             break;
1251
1252         case4(030):
1253             warn_overflow(2, opx);
1254             data = opx->offset;
1255             out(offset, segment, &data, OUT_ADDRESS, 2,
1256                 opx->segment, opx->wrt);
1257             offset += 2;
1258             break;
1259
1260         case4(034):
1261             if (opx->type & (BITS16 | BITS32))
1262                 size = (opx->type & BITS16) ? 2 : 4;
1263             else
1264                 size = (bits == 16) ? 2 : 4;
1265             warn_overflow(size, opx);
1266             data = opx->offset;
1267             out(offset, segment, &data, OUT_ADDRESS, size,
1268                 opx->segment, opx->wrt);
1269             offset += size;
1270             break;
1271
1272         case4(040):
1273             warn_overflow(4, opx);
1274             data = opx->offset;
1275             out(offset, segment, &data, OUT_ADDRESS, 4,
1276                 opx->segment, opx->wrt);
1277             offset += 4;
1278             break;
1279
1280         case4(044):
1281             data = opx->offset;
1282             size = ins->addr_size >> 3;
1283             warn_overflow(size, opx);
1284             out(offset, segment, &data, OUT_ADDRESS, size,
1285                 opx->segment, opx->wrt);
1286             offset += size;
1287             break;
1288
1289         case4(050):
1290             if (opx->segment != segment)
1291                 errfunc(ERR_NONFATAL,
1292                         "short relative jump outside segment");
1293             data = opx->offset - insn_end;
1294             if (data > 127 || data < -128)
1295                 errfunc(ERR_NONFATAL, "short jump is out of range");
1296             bytes[0] = data;
1297             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1298             offset += 1;
1299             break;
1300
1301         case4(054):
1302             data = (int64_t)opx->offset;
1303             out(offset, segment, &data, OUT_ADDRESS, 8,
1304                 opx->segment, opx->wrt);
1305             offset += 8;
1306             break;
1307
1308         case4(060):
1309             if (opx->segment != segment) {
1310                 data = opx->offset;
1311                 out(offset, segment, &data,
1312                     OUT_REL2ADR, insn_end - offset,
1313                     opx->segment, opx->wrt);
1314             } else {
1315                 data = opx->offset - insn_end;
1316                 out(offset, segment, &data,
1317                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1318             }
1319             offset += 2;
1320             break;
1321
1322         case4(064):
1323             if (opx->type & (BITS16 | BITS32 | BITS64))
1324                 size = (opx->type & BITS16) ? 2 : 4;
1325             else
1326                 size = (bits == 16) ? 2 : 4;
1327             if (opx->segment != segment) {
1328                 data = opx->offset;
1329                 out(offset, segment, &data,
1330                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1331                     insn_end - offset, opx->segment, opx->wrt);
1332             } else {
1333                 data = opx->offset - insn_end;
1334                 out(offset, segment, &data,
1335                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1336             }
1337             offset += size;
1338             break;
1339
1340         case4(070):
1341             if (opx->segment != segment) {
1342                 data = opx->offset;
1343                 out(offset, segment, &data,
1344                     OUT_REL4ADR, insn_end - offset,
1345                     opx->segment, opx->wrt);
1346             } else {
1347                 data = opx->offset - insn_end;
1348                 out(offset, segment, &data,
1349                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1350             }
1351             offset += 4;
1352             break;
1353
1354         case4(074):
1355             if (opx->segment == NO_SEG)
1356                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1357                         " relocatable");
1358             data = 0;
1359             out(offset, segment, &data, OUT_ADDRESS, 2,
1360                 outfmt->segbase(1 + opx->segment),
1361                 opx->wrt);
1362             offset += 2;
1363             break;
1364
1365         case4(0140):
1366             data = opx->offset;
1367             warn_overflow(2, opx);
1368             if (is_sbyte16(opx)) {
1369                 bytes[0] = data;
1370                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1371                     NO_SEG);
1372                 offset++;
1373             } else {
1374                 out(offset, segment, &data, OUT_ADDRESS, 2,
1375                     opx->segment, opx->wrt);
1376                 offset += 2;
1377             }
1378             break;
1379
1380         case4(0144):
1381             EMIT_REX();
1382             bytes[0] = *codes++;
1383             if (is_sbyte16(opx))
1384                 bytes[0] |= 2;  /* s-bit */
1385             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1386             offset++;
1387             break;
1388
1389         case4(0150):
1390             data = opx->offset;
1391             warn_overflow(4, opx);
1392             if (is_sbyte32(opx)) {
1393                 bytes[0] = data;
1394                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1395                     NO_SEG);
1396                 offset++;
1397             } else {
1398                 out(offset, segment, &data, OUT_ADDRESS, 4,
1399                     opx->segment, opx->wrt);
1400                 offset += 4;
1401             }
1402             break;
1403
1404         case4(0154):
1405             EMIT_REX();
1406             bytes[0] = *codes++;
1407             if (is_sbyte32(opx))
1408                 bytes[0] |= 2;  /* s-bit */
1409             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1410             offset++;
1411             break;
1412
1413         case4(0160):
1414         case4(0164):
1415             break;
1416
1417         case 0171:
1418             bytes[0] =
1419                 (ins->drexdst << 4) |
1420                 (ins->rex & REX_OC ? 0x08 : 0) |
1421                 (ins->rex & (REX_R|REX_X|REX_B));
1422             ins->rex = 0;
1423             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1424             offset++;
1425             break;
1426
1427         case 0172:
1428             c = *codes++;
1429             opx = &ins->oprs[c >> 3];
1430             bytes[0] = nasm_regvals[opx->basereg] << 4;
1431             opx = &ins->oprs[c & 7];
1432             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1433                 errfunc(ERR_NONFATAL,
1434                         "non-absolute expression not permitted as argument %d",
1435                         c & 7);
1436             } else {
1437                 if (opx->offset & ~15) {
1438                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1439                             "four-bit argument exceeds bounds");
1440                 }
1441                 bytes[0] |= opx->offset & 15;
1442             }
1443             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1444             offset++;
1445             break;
1446
1447         case 0173:
1448             c = *codes++;
1449             opx = &ins->oprs[c >> 4];
1450             bytes[0] = nasm_regvals[opx->basereg] << 4;
1451             bytes[0] |= c & 15;
1452             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1453             offset++;
1454             break;
1455
1456         case 0174:
1457             c = *codes++;
1458             opx = &ins->oprs[c];
1459             bytes[0] = nasm_regvals[opx->basereg] << 4;
1460             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1461             offset++;
1462             break;
1463
1464         case4(0250):
1465             data = opx->offset;
1466             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1467                 (int32_t)data != (int64_t)data) {
1468                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1469                         "signed dword immediate exceeds bounds");
1470             }
1471             if (is_sbyte32(opx)) {
1472                 bytes[0] = data;
1473                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1474                     NO_SEG);
1475                 offset++;
1476             } else {
1477                 out(offset, segment, &data, OUT_ADDRESS, 4,
1478                     opx->segment, opx->wrt);
1479                 offset += 4;
1480             }
1481             break;
1482
1483         case4(0254):
1484             data = opx->offset;
1485             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1486                 (int32_t)data != (int64_t)data) {
1487                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1488                         "signed dword immediate exceeds bounds");
1489             }
1490             out(offset, segment, &data, OUT_ADDRESS, 4,
1491                 opx->segment, opx->wrt);
1492             offset += 4;
1493             break;
1494
1495         case4(0260):
1496         case 0270:
1497             codes += 2;
1498             if (ins->vex_m != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1499                 bytes[0] = 0xc4;
1500                 bytes[1] = ins->vex_m | ((~ins->rex & 7) << 5);
1501                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1502                     ((~ins->drexdst & 15)<< 3) | (ins->vex_wlp & 07);
1503                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1504                 offset += 3;
1505             } else {
1506                 bytes[0] = 0xc5;
1507                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1508                     ((~ins->drexdst & 15) << 3) | (ins->vex_wlp & 07);
1509                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1510                 offset += 2;
1511             }
1512             break;
1513
1514         case4(0274):
1515         {
1516             uint64_t uv, um;
1517             int s;
1518
1519             if (ins->rex & REX_W)
1520                 s = 64;
1521             else if (ins->prefixes[PPS_OSIZE] == P_O16)
1522                 s = 16;
1523             else if (ins->prefixes[PPS_OSIZE] == P_O32)
1524                 s = 32;
1525             else
1526                 s = bits;
1527
1528             um = (uint64_t)2 << (s-1);
1529             uv = opx->offset;
1530
1531             if (uv > 127 && uv < (uint64_t)-128 &&
1532                 (uv < um-128 || uv > um-1)) {
1533                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1534                         "signed byte value exceeds bounds");
1535             }
1536             if (opx->segment != NO_SEG) {
1537                 data = uv;
1538                 out(offset, segment, &data, OUT_ADDRESS, 1,
1539                     opx->segment, opx->wrt);
1540             } else {
1541                 bytes[0] = uv;
1542                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1543                     NO_SEG);
1544             }
1545             offset += 1;
1546             break;
1547         }
1548
1549         case4(0300):
1550             break;
1551
1552         case 0310:
1553             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1554                 *bytes = 0x67;
1555                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1556                 offset += 1;
1557             } else
1558                 offset += 0;
1559             break;
1560
1561         case 0311:
1562             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1563                 *bytes = 0x67;
1564                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1565                 offset += 1;
1566             } else
1567                 offset += 0;
1568             break;
1569
1570         case 0312:
1571             break;
1572
1573         case 0313:
1574             ins->rex = 0;
1575             break;
1576
1577         case4(0314):
1578             break;
1579
1580         case 0320:
1581             if (bits != 16) {
1582                 *bytes = 0x66;
1583                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1584                 offset += 1;
1585             } else
1586                 offset += 0;
1587             break;
1588
1589         case 0321:
1590             if (bits == 16) {
1591                 *bytes = 0x66;
1592                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1593                 offset += 1;
1594             } else
1595                 offset += 0;
1596             break;
1597
1598         case 0322:
1599         case 0323:
1600             break;
1601
1602         case 0324:
1603             ins->rex |= REX_W;
1604             break;
1605
1606         case 0330:
1607             *bytes = *codes++ ^ condval[ins->condition];
1608             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1609             offset += 1;
1610             break;
1611
1612         case 0331:
1613             break;
1614
1615         case 0332:
1616         case 0333:
1617             *bytes = c - 0332 + 0xF2;
1618             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1619             offset += 1;
1620             break;
1621
1622         case 0334:
1623             if (ins->rex & REX_R) {
1624                 *bytes = 0xF0;
1625                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1626                 offset += 1;
1627             }
1628             ins->rex &= ~(REX_L|REX_R);
1629             break;
1630
1631         case 0335:
1632             break;
1633
1634         case 0336:
1635         case 0337:
1636             break;
1637
1638         case 0340:
1639             if (ins->oprs[0].segment != NO_SEG)
1640                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1641             else {
1642                 int64_t size = ins->oprs[0].offset;
1643                 if (size > 0)
1644                     out(offset, segment, NULL,
1645                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1646                 offset += size;
1647             }
1648             break;
1649
1650         case 0344:
1651         case 0345:
1652             bytes[0] = c & 1;
1653             switch (ins->oprs[0].basereg) {
1654             case R_CS:
1655                 bytes[0] += 0x0E;
1656                 break;
1657             case R_DS:
1658                 bytes[0] += 0x1E;
1659                 break;
1660             case R_ES:
1661                 bytes[0] += 0x06;
1662                 break;
1663             case R_SS:
1664                 bytes[0] += 0x16;
1665                 break;
1666             default:
1667                 errfunc(ERR_PANIC,
1668                         "bizarre 8086 segment register received");
1669             }
1670             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1671             offset++;
1672             break;
1673
1674         case 0346:
1675         case 0347:
1676             bytes[0] = c & 1;
1677             switch (ins->oprs[0].basereg) {
1678             case R_FS:
1679                 bytes[0] += 0xA0;
1680                 break;
1681             case R_GS:
1682                 bytes[0] += 0xA8;
1683                 break;
1684             default:
1685                 errfunc(ERR_PANIC,
1686                         "bizarre 386 segment register received");
1687             }
1688             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1689             offset++;
1690             break;
1691
1692         case 0360:
1693             break;
1694
1695         case 0361:
1696             bytes[0] = 0x66;
1697             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1698             offset += 1;
1699             break;
1700
1701         case 0362:
1702         case 0363:
1703             bytes[0] = c - 0362 + 0xf2;
1704             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1705             offset += 1;
1706             break;
1707
1708         case 0364:
1709         case 0365:
1710             break;
1711
1712         case 0366:
1713         case 0367:
1714             *bytes = c - 0366 + 0x66;
1715             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1716             offset += 1;
1717             break;
1718
1719         case 0370:
1720         case 0371:
1721         case 0372:
1722             break;
1723
1724         case 0373:
1725             *bytes = bits == 16 ? 3 : 5;
1726             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1727             offset += 1;
1728             break;
1729
1730         case4(0100):
1731         case4(0110):
1732         case4(0120):
1733         case4(0130):
1734         case4(0200):
1735         case4(0204):
1736         case4(0210):
1737         case4(0214):
1738         case4(0220):
1739         case4(0224):
1740         case4(0230):
1741         case4(0234):
1742             {
1743                 ea ea_data;
1744                 int rfield;
1745                 int32_t rflags;
1746                 uint8_t *p;
1747                 int32_t s;
1748                 enum out_type type;
1749
1750                 if (c <= 0177) {
1751                     /* pick rfield from operand b */
1752                     rflags = regflag(&ins->oprs[c & 7]);
1753                     rfield = nasm_regvals[ins->oprs[c & 7].basereg];
1754                 } else {
1755                     /* rfield is constant */
1756                     rflags = 0;
1757                     rfield = c & 7;
1758                 }
1759
1760                 if (!process_ea
1761                     (&ins->oprs[(c >> 3) & 7], &ea_data, bits,
1762                      ins->addr_size, rfield, rflags)) {
1763                     errfunc(ERR_NONFATAL, "invalid effective address");
1764                 }
1765
1766
1767                 p = bytes;
1768                 *p++ = ea_data.modrm;
1769                 if (ea_data.sib_present)
1770                     *p++ = ea_data.sib;
1771
1772                 /* DREX suffixes come between the SIB and the displacement */
1773                 if (ins->rex & REX_D) {
1774                     *p++ =
1775                         (ins->drexdst << 4) |
1776                         (ins->rex & REX_OC ? 0x08 : 0) |
1777                         (ins->rex & (REX_R|REX_X|REX_B));
1778                     ins->rex = 0;
1779                 }
1780
1781                 s = p - bytes;
1782                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1783
1784                 /*
1785                  * Make sure the address gets the right offset in case
1786                  * the line breaks in the .lst file (BR 1197827)
1787                  */
1788                 offset += s;
1789                 s = 0;
1790
1791                 switch (ea_data.bytes) {
1792                 case 0:
1793                     break;
1794                 case 1:
1795                     if (ins->oprs[(c >> 3) & 7].segment != NO_SEG) {
1796                         data = ins->oprs[(c >> 3) & 7].offset;
1797                         out(offset, segment, &data, OUT_ADDRESS, 1,
1798                             ins->oprs[(c >> 3) & 7].segment,
1799                             ins->oprs[(c >> 3) & 7].wrt);
1800                     } else {
1801                         *bytes = ins->oprs[(c >> 3) & 7].offset;
1802                         out(offset, segment, bytes, OUT_RAWDATA, 1,
1803                             NO_SEG, NO_SEG);
1804                     }
1805                     s++;
1806                     break;
1807                 case 8:
1808                 case 2:
1809                 case 4:
1810                     data = ins->oprs[(c >> 3) & 7].offset;
1811                     warn_overflow(ea_data.bytes, opx);
1812                     s += ea_data.bytes;
1813                     if (ea_data.rip) {
1814                         data -= insn_end - (offset+ea_data.bytes);
1815                         type = OUT_REL4ADR;
1816                     } else {
1817                         type = OUT_ADDRESS;
1818                     }
1819                     out(offset, segment, &data, type, ea_data.bytes,
1820                         ins->oprs[(c >> 3) & 7].segment,
1821                         ins->oprs[(c >> 3) & 7].wrt);
1822                     break;
1823                 }
1824                 offset += s;
1825             }
1826             break;
1827
1828         default:
1829             errfunc(ERR_PANIC, "internal instruction table corrupt"
1830                     ": instruction code 0x%02X given", c);
1831             break;
1832         }
1833     }
1834 }
1835
1836 static int32_t regflag(const operand * o)
1837 {
1838     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1839         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1840     }
1841     return nasm_reg_flags[o->basereg];
1842 }
1843
1844 static int32_t regval(const operand * o)
1845 {
1846     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1847         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1848     }
1849     return nasm_regvals[o->basereg];
1850 }
1851
1852 static int op_rexflags(const operand * o, int mask)
1853 {
1854     int32_t flags;
1855     int val;
1856
1857     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1858         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1859     }
1860
1861     flags = nasm_reg_flags[o->basereg];
1862     val = nasm_regvals[o->basereg];
1863
1864     return rexflags(val, flags, mask);
1865 }
1866
1867 static int rexflags(int val, int32_t flags, int mask)
1868 {
1869     int rex = 0;
1870
1871     if (val >= 8)
1872         rex |= REX_B|REX_X|REX_R;
1873     if (flags & BITS64)
1874         rex |= REX_W;
1875     if (!(REG_HIGH & ~flags))   /* AH, CH, DH, BH */
1876         rex |= REX_H;
1877     else if (!(REG8 & ~flags) && val >= 4) /* SPL, BPL, SIL, DIL */
1878         rex |= REX_P;
1879
1880     return rex & mask;
1881 }
1882
1883 static int matches(const struct itemplate *itemp, insn * instruction, int bits)
1884 {
1885     int i, size[MAX_OPERANDS], asize, oprs, ret;
1886
1887     ret = 100;
1888
1889     /*
1890      * Check the opcode
1891      */
1892     if (itemp->opcode != instruction->opcode)
1893         return 0;
1894
1895     /*
1896      * Count the operands
1897      */
1898     if (itemp->operands != instruction->operands)
1899         return 0;
1900
1901     /*
1902      * Check that no spurious colons or TOs are present
1903      */
1904     for (i = 0; i < itemp->operands; i++)
1905         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
1906             return 0;
1907
1908     /*
1909      * Process size flags
1910      */
1911     if (itemp->flags & IF_ARMASK) {
1912         memset(size, 0, sizeof size);
1913
1914         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
1915
1916         switch (itemp->flags & IF_SMASK) {
1917         case IF_SB:
1918             size[i] = BITS8;
1919             break;
1920         case IF_SW:
1921             size[i] = BITS16;
1922             break;
1923         case IF_SD:
1924             size[i] = BITS32;
1925             break;
1926         case IF_SQ:
1927             size[i] = BITS64;
1928             break;
1929         case IF_SO:
1930             size[i] = BITS128;
1931             break;
1932         case IF_SY:
1933             size[i] = BITS256;
1934             break;
1935         case IF_SZ:
1936             switch (bits) {
1937             case 16:
1938                 size[i] = BITS16;
1939                 break;
1940             case 32:
1941                 size[i] = BITS32;
1942                 break;
1943             case 64:
1944                 size[i] = BITS64;
1945                 break;
1946             }
1947             break;
1948         default:
1949             break;
1950         }
1951     } else {
1952         asize = 0;
1953         switch (itemp->flags & IF_SMASK) {
1954         case IF_SB:
1955             asize = BITS8;
1956             break;
1957         case IF_SW:
1958             asize = BITS16;
1959             break;
1960         case IF_SD:
1961             asize = BITS32;
1962             break;
1963         case IF_SQ:
1964             asize = BITS64;
1965             break;
1966         case IF_SO:
1967             asize = BITS128;
1968             break;
1969         case IF_SY:
1970             asize = BITS256;
1971             break;
1972         case IF_SZ:
1973             switch (bits) {
1974             case 16:
1975                 asize = BITS16;
1976                 break;
1977             case 32:
1978                 asize = BITS32;
1979                 break;
1980             case 64:
1981                 asize = BITS64;
1982                 break;
1983             }
1984             break;
1985         default:
1986             break;
1987         }
1988         for (i = 0; i < MAX_OPERANDS; i++)
1989             size[i] = asize;
1990     }
1991
1992     /*
1993      * Check that the operand flags all match up
1994      */
1995     for (i = 0; i < itemp->operands; i++) {
1996         int32_t type = instruction->oprs[i].type;
1997         if (!(type & SIZE_MASK))
1998             type |= size[i];
1999
2000         if (itemp->opd[i] & SAME_AS) {
2001             int j = itemp->opd[i] & ~SAME_AS;
2002             if (type != instruction->oprs[j].type ||
2003                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2004                 return 0;
2005         } else if (itemp->opd[i] & ~type ||
2006             ((itemp->opd[i] & SIZE_MASK) &&
2007              ((itemp->opd[i] ^ type) & SIZE_MASK))) {
2008             if ((itemp->opd[i] & ~type & ~SIZE_MASK) ||
2009                 (type & SIZE_MASK))
2010                 return 0;
2011             else
2012                 return 1;
2013         }
2014     }
2015
2016     /*
2017      * Check operand sizes
2018      */
2019     if (itemp->flags & (IF_SM | IF_SM2)) {
2020         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2021         asize = 0;
2022         for (i = 0; i < oprs; i++) {
2023             if ((asize = itemp->opd[i] & SIZE_MASK) != 0) {
2024                 int j;
2025                 for (j = 0; j < oprs; j++)
2026                     size[j] = asize;
2027                 break;
2028             }
2029         }
2030     } else {
2031         oprs = itemp->operands;
2032     }
2033
2034     for (i = 0; i < itemp->operands; i++) {
2035         if (!(itemp->opd[i] & SIZE_MASK) &&
2036             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2037             return 2;
2038     }
2039
2040     /*
2041      * Check template is okay at the set cpu level
2042      */
2043     if (((itemp->flags & IF_PLEVEL) > cpu))
2044         return 3;
2045
2046     /*
2047      * Check if instruction is available in long mode
2048      */
2049     if ((itemp->flags & IF_NOLONG) && (bits == 64))
2050         return 4;
2051
2052     /*
2053      * Check if special handling needed for Jumps
2054      */
2055     if ((uint8_t)(itemp->code[0]) >= 0370)
2056         return 99;
2057
2058     return ret;
2059 }
2060
2061 static ea *process_ea(operand * input, ea * output, int bits,
2062                       int addrbits, int rfield, int32_t rflags)
2063 {
2064     bool forw_ref = !!(input->opflags & OPFLAG_FORWARD);
2065
2066     output->rip = false;
2067
2068     /* REX flags for the rfield operand */
2069     output->rex |= rexflags(rfield, rflags, REX_R|REX_P|REX_W|REX_H);
2070
2071     if (!(REGISTER & ~input->type)) {   /* register direct */
2072         int i;
2073         int32_t f;
2074
2075         if (input->basereg < EXPR_REG_START /* Verify as Register */
2076             || input->basereg >= REG_ENUM_LIMIT)
2077             return NULL;
2078         f = regflag(input);
2079         i = nasm_regvals[input->basereg];
2080
2081         if (REG_EA & ~f)
2082             return NULL;        /* Invalid EA register */
2083
2084         output->rex |= op_rexflags(input, REX_B|REX_P|REX_W|REX_H);
2085
2086         output->sib_present = false;             /* no SIB necessary */
2087         output->bytes = 0;  /* no offset necessary either */
2088         output->modrm = 0xC0 | ((rfield & 7) << 3) | (i & 7);
2089     } else {                    /* it's a memory reference */
2090         if (input->basereg == -1
2091             && (input->indexreg == -1 || input->scale == 0)) {
2092             /* it's a pure offset */
2093             if (bits == 64 && (~input->type & IP_REL)) {
2094               int scale, index, base;
2095               output->sib_present = true;
2096               scale = 0;
2097               index = 4;
2098               base = 5;
2099               output->sib = (scale << 6) | (index << 3) | base;
2100               output->bytes = 4;
2101               output->modrm = 4 | ((rfield & 7) << 3);
2102               output->rip = false;
2103             } else {
2104               output->sib_present = false;
2105               output->bytes = (addrbits != 16 ? 4 : 2);
2106               output->modrm = (addrbits != 16 ? 5 : 6) | ((rfield & 7) << 3);
2107               output->rip = bits == 64;
2108             }
2109         } else {                /* it's an indirection */
2110             int i = input->indexreg, b = input->basereg, s = input->scale;
2111             int32_t o = input->offset, seg = input->segment;
2112             int hb = input->hintbase, ht = input->hinttype;
2113             int t;
2114             int it, bt;
2115             int32_t ix, bx;     /* register flags */
2116
2117             if (s == 0)
2118                 i = -1;         /* make this easy, at least */
2119
2120             if (i >= EXPR_REG_START && i < REG_ENUM_LIMIT) {
2121                 it = nasm_regvals[i];
2122                 ix = nasm_reg_flags[i];
2123             } else {
2124                 it = -1;
2125                 ix = 0;
2126             }
2127
2128             if (b >= EXPR_REG_START && b < REG_ENUM_LIMIT) {
2129                 bt = nasm_regvals[b];
2130                 bx = nasm_reg_flags[b];
2131             } else {
2132                 bt = -1;
2133                 bx = 0;
2134             }
2135
2136             /* check for a 32/64-bit memory reference... */
2137             if ((ix|bx) & (BITS32|BITS64)) {
2138                 /* it must be a 32/64-bit memory reference. Firstly we have
2139                  * to check that all registers involved are type E/Rxx. */
2140                 int32_t sok = BITS32|BITS64;
2141
2142                 if (it != -1) {
2143                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2144                         sok &= ix;
2145                     else
2146                         return NULL;
2147                 }
2148
2149                 if (bt != -1) {
2150                     if (REG_GPR & ~bx)
2151                         return NULL; /* Invalid register */
2152                     if (~sok & bx & SIZE_MASK)
2153                         return NULL; /* Invalid size */
2154                     sok &= bx;
2155                 }
2156
2157                 /* While we're here, ensure the user didn't specify
2158                    WORD or QWORD. */
2159                 if (input->disp_size == 16 || input->disp_size == 64)
2160                     return NULL;
2161
2162                 if (addrbits == 16 ||
2163                     (addrbits == 32 && !(sok & BITS32)) ||
2164                     (addrbits == 64 && !(sok & BITS64)))
2165                     return NULL;
2166
2167                 /* now reorganize base/index */
2168                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2169                     ((hb == b && ht == EAH_NOTBASE)
2170                      || (hb == i && ht == EAH_MAKEBASE))) {
2171                     /* swap if hints say so */
2172                     t = bt, bt = it, it = t;
2173                     t = bx, bx = ix, ix = t;
2174                 }
2175                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2176                     bt = -1, bx = 0, s++;
2177                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2178                     /* make single reg base, unless hint */
2179                     bt = it, bx = ix, it = -1, ix = 0;
2180                 }
2181                 if (((s == 2 && it != REG_NUM_ESP
2182                       && !(input->eaflags & EAF_TIMESTWO)) || s == 3
2183                      || s == 5 || s == 9) && bt == -1)
2184                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2185                 if (it == -1 && (bt & 7) != REG_NUM_ESP
2186                     && (input->eaflags & EAF_TIMESTWO))
2187                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2188                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2189                 if (s == 1 && it == REG_NUM_ESP) {
2190                     /* swap ESP into base if scale is 1 */
2191                     t = it, it = bt, bt = t;
2192                     t = ix, ix = bx, bx = t;
2193                 }
2194                 if (it == REG_NUM_ESP
2195                     || (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2196                     return NULL;        /* wrong, for various reasons */
2197
2198                 output->rex |= rexflags(it, ix, REX_X);
2199                 output->rex |= rexflags(bt, bx, REX_B);
2200
2201                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2202                     /* no SIB needed */
2203                     int mod, rm;
2204
2205                     if (bt == -1) {
2206                         rm = 5;
2207                         mod = 0;
2208                     } else {
2209                         rm = (bt & 7);
2210                         if (rm != REG_NUM_EBP && o == 0 &&
2211                                 seg == NO_SEG && !forw_ref &&
2212                                 !(input->eaflags &
2213                                   (EAF_BYTEOFFS | EAF_WORDOFFS)))
2214                             mod = 0;
2215                         else if (input->eaflags & EAF_BYTEOFFS ||
2216                                  (o >= -128 && o <= 127 && seg == NO_SEG
2217                                   && !forw_ref
2218                                   && !(input->eaflags & EAF_WORDOFFS)))
2219                             mod = 1;
2220                         else
2221                             mod = 2;
2222                     }
2223
2224                     output->sib_present = false;
2225                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2226                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2227                 } else {
2228                     /* we need a SIB */
2229                     int mod, scale, index, base;
2230
2231                     if (it == -1)
2232                         index = 4, s = 1;
2233                     else
2234                         index = (it & 7);
2235
2236                     switch (s) {
2237                     case 1:
2238                         scale = 0;
2239                         break;
2240                     case 2:
2241                         scale = 1;
2242                         break;
2243                     case 4:
2244                         scale = 2;
2245                         break;
2246                     case 8:
2247                         scale = 3;
2248                         break;
2249                     default:   /* then what the smeg is it? */
2250                         return NULL;    /* panic */
2251                     }
2252
2253                     if (bt == -1) {
2254                         base = 5;
2255                         mod = 0;
2256                     } else {
2257                         base = (bt & 7);
2258                         if (base != REG_NUM_EBP && o == 0 &&
2259                                     seg == NO_SEG && !forw_ref &&
2260                                     !(input->eaflags &
2261                                       (EAF_BYTEOFFS | EAF_WORDOFFS)))
2262                             mod = 0;
2263                         else if (input->eaflags & EAF_BYTEOFFS ||
2264                                  (o >= -128 && o <= 127 && seg == NO_SEG
2265                                   && !forw_ref
2266                                   && !(input->eaflags & EAF_WORDOFFS)))
2267                             mod = 1;
2268                         else
2269                             mod = 2;
2270                     }
2271
2272                     output->sib_present = true;
2273                     output->bytes =  (bt == -1 || mod == 2 ? 4 : mod);
2274                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | 4;
2275                     output->sib = (scale << 6) | (index << 3) | base;
2276                 }
2277             } else {            /* it's 16-bit */
2278                 int mod, rm;
2279
2280                 /* check for 64-bit long mode */
2281                 if (addrbits == 64)
2282                     return NULL;
2283
2284                 /* check all registers are BX, BP, SI or DI */
2285                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI
2286                      && b != R_DI) || (i != -1 && i != R_BP && i != R_BX
2287                                        && i != R_SI && i != R_DI))
2288                     return NULL;
2289
2290                 /* ensure the user didn't specify DWORD/QWORD */
2291                 if (input->disp_size == 32 || input->disp_size == 64)
2292                     return NULL;
2293
2294                 if (s != 1 && i != -1)
2295                     return NULL;        /* no can do, in 16-bit EA */
2296                 if (b == -1 && i != -1) {
2297                     int tmp = b;
2298                     b = i;
2299                     i = tmp;
2300                 }               /* swap */
2301                 if ((b == R_SI || b == R_DI) && i != -1) {
2302                     int tmp = b;
2303                     b = i;
2304                     i = tmp;
2305                 }
2306                 /* have BX/BP as base, SI/DI index */
2307                 if (b == i)
2308                     return NULL;        /* shouldn't ever happen, in theory */
2309                 if (i != -1 && b != -1 &&
2310                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2311                     return NULL;        /* invalid combinations */
2312                 if (b == -1)    /* pure offset: handled above */
2313                     return NULL;        /* so if it gets to here, panic! */
2314
2315                 rm = -1;
2316                 if (i != -1)
2317                     switch (i * 256 + b) {
2318                     case R_SI * 256 + R_BX:
2319                         rm = 0;
2320                         break;
2321                     case R_DI * 256 + R_BX:
2322                         rm = 1;
2323                         break;
2324                     case R_SI * 256 + R_BP:
2325                         rm = 2;
2326                         break;
2327                     case R_DI * 256 + R_BP:
2328                         rm = 3;
2329                         break;
2330                 } else
2331                     switch (b) {
2332                     case R_SI:
2333                         rm = 4;
2334                         break;
2335                     case R_DI:
2336                         rm = 5;
2337                         break;
2338                     case R_BP:
2339                         rm = 6;
2340                         break;
2341                     case R_BX:
2342                         rm = 7;
2343                         break;
2344                     }
2345                 if (rm == -1)   /* can't happen, in theory */
2346                     return NULL;        /* so panic if it does */
2347
2348                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2349                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2350                     mod = 0;
2351                 else if (input->eaflags & EAF_BYTEOFFS ||
2352                          (o >= -128 && o <= 127 && seg == NO_SEG
2353                           && !forw_ref
2354                           && !(input->eaflags & EAF_WORDOFFS)))
2355                     mod = 1;
2356                 else
2357                     mod = 2;
2358
2359                 output->sib_present = false;    /* no SIB - it's 16-bit */
2360                 output->bytes = mod;    /* bytes of offset needed */
2361                 output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2362             }
2363         }
2364     }
2365
2366     output->size = 1 + output->sib_present + output->bytes;
2367     return output;
2368 }
2369
2370 static void add_asp(insn *ins, int addrbits)
2371 {
2372     int j, valid;
2373     int defdisp;
2374
2375     valid = (addrbits == 64) ? 64|32 : 32|16;
2376
2377     switch (ins->prefixes[PPS_ASIZE]) {
2378     case P_A16:
2379         valid &= 16;
2380         break;
2381     case P_A32:
2382         valid &= 32;
2383         break;
2384     case P_A64:
2385         valid &= 64;
2386         break;
2387     case P_ASP:
2388         valid &= (addrbits == 32) ? 16 : 32;
2389         break;
2390     default:
2391         break;
2392     }
2393
2394     for (j = 0; j < ins->operands; j++) {
2395         if (!(MEMORY & ~ins->oprs[j].type)) {
2396             int32_t i, b;
2397
2398             /* Verify as Register */
2399             if (ins->oprs[j].indexreg < EXPR_REG_START
2400                 || ins->oprs[j].indexreg >= REG_ENUM_LIMIT)
2401                 i = 0;
2402             else
2403                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2404
2405             /* Verify as Register */
2406             if (ins->oprs[j].basereg < EXPR_REG_START
2407                 || ins->oprs[j].basereg >= REG_ENUM_LIMIT)
2408                 b = 0;
2409             else
2410                 b = nasm_reg_flags[ins->oprs[j].basereg];
2411
2412             if (ins->oprs[j].scale == 0)
2413                 i = 0;
2414
2415             if (!i && !b) {
2416                 int ds = ins->oprs[j].disp_size;
2417                 if ((addrbits != 64 && ds > 8) ||
2418                     (addrbits == 64 && ds == 16))
2419                     valid &= ds;
2420             } else {
2421                 if (!(REG16 & ~b))
2422                     valid &= 16;
2423                 if (!(REG32 & ~b))
2424                     valid &= 32;
2425                 if (!(REG64 & ~b))
2426                     valid &= 64;
2427
2428                 if (!(REG16 & ~i))
2429                     valid &= 16;
2430                 if (!(REG32 & ~i))
2431                     valid &= 32;
2432                 if (!(REG64 & ~i))
2433                     valid &= 64;
2434             }
2435         }
2436     }
2437
2438     if (valid & addrbits) {
2439         ins->addr_size = addrbits;
2440     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2441         /* Add an address size prefix */
2442         enum prefixes pref = (addrbits == 32) ? P_A16 : P_A32;
2443         ins->prefixes[PPS_ASIZE] = pref;
2444         ins->addr_size = (addrbits == 32) ? 16 : 32;
2445     } else {
2446         /* Impossible... */
2447         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2448         ins->addr_size = addrbits; /* Error recovery */
2449     }
2450
2451     defdisp = ins->addr_size == 16 ? 16 : 32;
2452
2453     for (j = 0; j < ins->operands; j++) {
2454         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2455             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp)
2456             != ins->addr_size) {
2457             /* mem_offs sizes must match the address size; if not,
2458                strip the MEM_OFFS bit and match only EA instructions */
2459             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2460         }
2461     }
2462 }