assemble.c

   1 /* assemble.c   code generation for the Netwide Assembler
   2  *
   3  * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
   4  * Julian Hall. All rights reserved. The software is
   5  * redistributable under the license given in the file "LICENSE"
   6  * distributed in the NASM archive.
   7  *
   8  * the actual codes (C syntax, i.e. octal):
   9  * \0            - terminates the code. (Unless it's a literal of course.)
  10  * \1, \2, \3    - that many literal bytes follow in the code stream
  11  * \4, \6        - the POP/PUSH (respectively) codes for CS, DS, ES, SS
  12  *                 (POP is never used for CS) depending on operand 0
  13  * \5, \7        - the second byte of POP/PUSH codes for FS, GS, depending
  14  *                 on operand 0
  15  * \10..\13      - a literal byte follows in the code stream, to be added
  16  *                 to the register value of operand 0..3
  17  * \14..\17      - a signed byte immediate operand, from operand 0..3
  18  * \20..\23      - a byte immediate operand, from operand 0..3
  19  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  20  * \30..\33      - a word immediate operand, from operand 0..3
  21  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  22  *                 assembly mode or the operand-size override on the operand
  23  * \40..\43      - a long immediate operand, from operand 0..3
  24  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  25  *                 depending on the address size of the instruction.
  26  * \50..\53      - a byte relative operand, from operand 0..3
  27  * \54..\57      - a qword immediate operand, from operand 0..3
  28  * \60..\63      - a word relative operand, from operand 0..3
  29  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  30  *                 assembly mode or the operand-size override on the operand
  31  * \70..\73      - a long relative operand, from operand 0..3
  32  * \74..\77      - a word constant, from the _segment_ part of operand 0..3
  33  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  34  *                 field the register value of operand b.
  35  * \140..\143    - an immediate word or signed byte for operand 0..3
  36  * \144..\147    - or 2 (s-field) into opcode byte if operand 0..3
  37  *                  is a signed byte rather than a word.  Opcode byte follows.
  38  * \150..\153    - an immediate dword or signed byte for operand 0..3
  39  * \154..\157    - or 2 (s-field) into opcode byte if operand 0..3
  40  *                  is a signed byte rather than a dword.  Opcode byte follows.
  41  * \160..\163    - this instruction uses DREX rather than REX, with the
  42  *                 OC0 field set to 0, and the dest field taken from
  43  *                 operand 0..3.
  44  * \164..\167    - this instruction uses DREX rather than REX, with the
  45  *                 OC0 field set to 1, and the dest field taken from
  46  *                 operand 0..3.
  47  * \171          - placement of DREX suffix in the absence of an EA
  48  * \172\ab       - the register number from operand a in bits 7..4, with
  49  *                 the 4-bit immediate from operand b in bits 3..0.
  50  * \173\xab      - the register number from operand a in bits 7..4, with
  51  *                 the value b in bits 3..0.
  52  * \174\a        - the register number from operand a in bits 7..4, and
  53  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  54  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  55  *                 field equal to digit b.
  56  * \250..\253    - same as \150..\153, except warn if the 64-bit operand
  57  *                 is not equal to the truncated and sign-extended 32-bit
  58  *                 operand; used for 32-bit immediates in 64-bit mode.
  59  * \260..\263    - this instruction uses VEX rather than REX, with the
  60  *                 V field taken from operand 0..3.
  61  * \270          - this instruction uses VEX rather than REX, with the
  62  *                 V field set to 1111b.
  63  *
  64  * VEX prefixes are followed by the sequence:
  65  * \mm\wlp         where mm is the M field; and wlp is:
  66  *                 00 0ww lpp
  67  *                 [w0] ww = 0 for W = 0
  68  *                 [w1] ww = 1 for W = 1
  69  *                 [wx] ww = 2 for W don't care (always assembled as 0)
  70  *                 [ww] ww = 3 for W used as REX.W
  71  *
  72  *
  73  * \274..\277    - a signed byte immediate operand, from operand 0..3,
  74  *                 which is to be extended to the operand size.
  75  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
  76  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
  77  * \312          - (disassembler only) marker on LOOP, LOOPxx instructions.
  78  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
  79  * \314          - (disassembler only) invalid with REX.B
  80  * \315          - (disassembler only) invalid with REX.X
  81  * \316          - (disassembler only) invalid with REX.R
  82  * \317          - (disassembler only) invalid with REX.W
  83  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
  84  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
  85  * \322          - indicates that this instruction is only valid when the
  86  *                 operand size is the default (instruction to disassembler,
  87  *                 generates no code in the assembler)
  88  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
  89  * \324          - indicates 64-bit operand size requiring REX prefix.
  90  * \330          - a literal byte follows in the code stream, to be added
  91  *                 to the condition code value of the instruction.
  92  * \331          - instruction not valid with REP prefix.  Hint for
  93  *                 disassembler only; for SSE instructions.
  94  * \332          - REP prefix (0xF2 byte) used as opcode extension.
  95  * \333          - REP prefix (0xF3 byte) used as opcode extension.
  96  * \334          - LOCK prefix used instead of REX.R
  97  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
  98  * \336          - force a REP(E) prefix (0xF2) even if not specified.
  99  * \337          - force a REPNE prefix (0xF3) even if not specified.
 100  *                 \336-\337 are still listed as prefixes in the disassembler.
 101  * \340          - reserve <operand 0> bytes of uninitialized storage.
 102  *                 Operand 0 had better be a segmentless constant.
 103  * \360          - no SSE prefix (== \364\331)
 104  * \361          - 66 SSE prefix (== \366\331)
 105  * \362          - F2 SSE prefix (== \364\332)
 106  * \363          - F3 SSE prefix (== \364\333)
 107  * \364          - operand-size prefix (0x66) not permitted
 108  * \365          - address-size prefix (0x67) not permitted
 109  * \366          - operand-size prefix (0x66) used as opcode extension
 110  * \367          - address-size prefix (0x67) used as opcode extension
 111  * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
 112  *                 370 is used for Jcc, 371 is used for JMP.
 113  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 114  *                 used for conditional jump over longer jump
 115  */
 116
 117 #include "compiler.h"
 118
 119 #include <stdio.h>
 120 #include <string.h>
 121 #include <inttypes.h>
 122
 123 #include "nasm.h"
 124 #include "nasmlib.h"
 125 #include "assemble.h"
 126 #include "insns.h"
 127 #include "tables.h"
 128
 129 /* Initialized to zero by the C standard */
 130 static const uint8_t const_zero_buf[256];
 131
 132 typedef struct {
 133     int sib_present;                 /* is a SIB byte necessary? */
 134     int bytes;                       /* # of bytes of offset needed */
 135     int size;                        /* lazy - this is sib+bytes+1 */
 136     uint8_t modrm, sib, rex, rip;    /* the bytes themselves */
 137 } ea;
 138
 139 static uint32_t cpu;            /* cpu level received from nasm.c */
 140 static efunc errfunc;
 141 static struct ofmt *outfmt;
 142 static ListGen *list;
 143
 144 static int64_t calcsize(int32_t, int64_t, int, insn *, const uint8_t *);
 145 static void gencode(int32_t segment, int64_t offset, int bits,
 146                     insn * ins, const struct itemplate *temp,
 147                     int64_t insn_end);
 148 static int matches(const struct itemplate *, insn *, int bits);
 149 static int32_t regflag(const operand *);
 150 static int32_t regval(const operand *);
 151 static int rexflags(int, int32_t, int);
 152 static int op_rexflags(const operand *, int);
 153 static ea *process_ea(operand *, ea *, int, int, int, int32_t);
 154 static void add_asp(insn *, int);
 155
 156 static int has_prefix(insn * ins, enum prefix_pos pos, enum prefixes prefix)
 157 {
 158     return ins->prefixes[pos] == prefix;
 159 }
 160
 161 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 162 {
 163     if (ins->prefixes[pos])
 164         errfunc(ERR_NONFATAL, "invalid %s prefix",
 165                 prefix_name(ins->prefixes[pos]));
 166 }
 167
 168 static const char *size_name(int size)
 169 {
 170     switch (size) {
 171     case 1:
 172         return "byte";
 173     case 2:
 174         return "word";
 175     case 4:
 176         return "dword";
 177     case 8:
 178         return "qword";
 179     case 10:
 180         return "tword";
 181     case 16:
 182         return "oword";
 183     case 32:
 184         return "yword";
 185     default:
 186         return "???";
 187     }
 188 }
 189
 190 static void warn_overflow(int size, const struct operand *o)
 191 {
 192     if (size < 8 && o->wrt == NO_SEG && o->segment == NO_SEG) {
 193         int64_t lim = ((int64_t)1 << (size*8))-1;
 194         int64_t data = o->offset;
 195
 196         if (data < ~lim || data > lim)
 197             errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
 198                     "%s data exceeds bounds", size_name(size));
 199     }
 200 }
 201 /*
 202  * This routine wrappers the real output format's output routine,
 203  * in order to pass a copy of the data off to the listing file
 204  * generator at the same time.
 205  */
 206 static void out(int64_t offset, int32_t segto, const void *data,
 207                 enum out_type type, uint64_t size,
 208                 int32_t segment, int32_t wrt)
 209 {
 210     static int32_t lineno = 0;     /* static!!! */
 211     static char *lnfname = NULL;
 212     uint8_t p[8];
 213
 214     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 215         /*
 216          * This is a non-relocated address, and we're going to
 217          * convert it into RAWDATA format.
 218          */
 219         uint8_t *q = p;
 220
 221         if (size > 8) {
 222             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 223             return;
 224         }
 225
 226         WRITEADDR(q, *(int64_t *)data, size);
 227         data = p;
 228         type = OUT_RAWDATA;
 229     }
 230
 231     list->output(offset, data, type, size);
 232
 233     /*
 234      * this call to src_get determines when we call the
 235      * debug-format-specific "linenum" function
 236      * it updates lineno and lnfname to the current values
 237      * returning 0 if "same as last time", -2 if lnfname
 238      * changed, and the amount by which lineno changed,
 239      * if it did. thus, these variables must be static
 240      */
 241
 242     if (src_get(&lineno, &lnfname)) {
 243         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 244     }
 245
 246     outfmt->output(segto, data, type, size, segment, wrt);
 247 }
 248
 249 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 250                      insn * ins, const uint8_t *code)
 251 {
 252     int64_t isize;
 253     uint8_t c = code[0];
 254
 255     if ((c != 0370 && c != 0371) || (ins->oprs[0].type & STRICT))
 256         return false;
 257     if (!optimizing)
 258         return false;
 259     if (optimizing < 0 && c == 0371)
 260         return false;
 261
 262     isize = calcsize(segment, offset, bits, ins, code);
 263     if (ins->oprs[0].segment != segment)
 264         return false;
 265
 266     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 267     return (isize >= -128 && isize <= 127); /* is it byte size? */
 268 }
 269
 270 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 271               insn * instruction, struct ofmt *output, efunc error,
 272               ListGen * listgen)
 273 {
 274     const struct itemplate *temp;
 275     int j;
 276     int size_prob;
 277     int64_t insn_end;
 278     int32_t itimes;
 279     int64_t start = offset;
 280     int64_t wsize = 0;             /* size for DB etc. */
 281
 282     errfunc = error;            /* to pass to other functions */
 283     cpu = cp;
 284     outfmt = output;            /* likewise */
 285     list = listgen;             /* and again */
 286
 287     switch (instruction->opcode) {
 288     case -1:
 289         return 0;
 290     case I_DB:
 291         wsize = 1;
 292         break;
 293     case I_DW:
 294         wsize = 2;
 295         break;
 296     case I_DD:
 297         wsize = 4;
 298         break;
 299     case I_DQ:
 300         wsize = 8;
 301         break;
 302     case I_DT:
 303         wsize = 10;
 304         break;
 305     case I_DO:
 306         wsize = 16;
 307         break;
 308     case I_DY:
 309         wsize = 32;
 310         break;
 311     default:
 312         break;
 313     }
 314
 315     if (wsize) {
 316         extop *e;
 317         int32_t t = instruction->times;
 318         if (t < 0)
 319             errfunc(ERR_PANIC,
 320                     "instruction->times < 0 (%ld) in assemble()", t);
 321
 322         while (t--) {           /* repeat TIMES times */
 323             for (e = instruction->eops; e; e = e->next) {
 324                 if (e->type == EOT_DB_NUMBER) {
 325                     if (wsize == 1) {
 326                         if (e->segment != NO_SEG)
 327                             errfunc(ERR_NONFATAL,
 328                                     "one-byte relocation attempted");
 329                         else {
 330                             uint8_t out_byte = e->offset;
 331                             out(offset, segment, &out_byte,
 332                                 OUT_RAWDATA, 1, NO_SEG, NO_SEG);
 333                         }
 334                     } else if (wsize > 8) {
 335                         errfunc(ERR_NONFATAL,
 336                                 "integer supplied to a DT, DO or DY"
 337                                 " instruction");
 338                     } else
 339                         out(offset, segment, &e->offset,
 340                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 341                     offset += wsize;
 342                 } else if (e->type == EOT_DB_STRING ||
 343                            e->type == EOT_DB_STRING_FREE) {
 344                     int align;
 345
 346                     out(offset, segment, e->stringval,
 347                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 348                     align = e->stringlen % wsize;
 349
 350                     if (align) {
 351                         align = wsize - align;
 352                         out(offset, segment, const_zero_buf,
 353                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 354                     }
 355                     offset += e->stringlen + align;
 356                 }
 357             }
 358             if (t > 0 && t == instruction->times - 1) {
 359                 /*
 360                  * Dummy call to list->output to give the offset to the
 361                  * listing module.
 362                  */
 363                 list->output(offset, NULL, OUT_RAWDATA, 0);
 364                 list->uplevel(LIST_TIMES);
 365             }
 366         }
 367         if (instruction->times > 1)
 368             list->downlevel(LIST_TIMES);
 369         return offset - start;
 370     }
 371
 372     if (instruction->opcode == I_INCBIN) {
 373         const char *fname = instruction->eops->stringval;
 374         FILE *fp;
 375
 376         fp = fopen(fname, "rb");
 377         if (!fp) {
 378             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 379                   fname);
 380         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 381             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 382                   fname);
 383         } else {
 384             static char buf[4096];
 385             size_t t = instruction->times;
 386             size_t base = 0;
 387             size_t len;
 388
 389             len = ftell(fp);
 390             if (instruction->eops->next) {
 391                 base = instruction->eops->next->offset;
 392                 len -= base;
 393                 if (instruction->eops->next->next &&
 394                     len > (size_t)instruction->eops->next->next->offset)
 395                     len = (size_t)instruction->eops->next->next->offset;
 396             }
 397             /*
 398              * Dummy call to list->output to give the offset to the
 399              * listing module.
 400              */
 401             list->output(offset, NULL, OUT_RAWDATA, 0);
 402             list->uplevel(LIST_INCBIN);
 403             while (t--) {
 404                 size_t l;
 405
 406                 fseek(fp, base, SEEK_SET);
 407                 l = len;
 408                 while (l > 0) {
 409                     int32_t m =
 410                         fread(buf, 1, (l > (int32_t) sizeof(buf) ? (int32_t) sizeof(buf) : l),
 411                               fp);
 412                     if (!m) {
 413                         /*
 414                          * This shouldn't happen unless the file
 415                          * actually changes while we are reading
 416                          * it.
 417                          */
 418                         error(ERR_NONFATAL,
 419                               "`incbin': unexpected EOF while"
 420                               " reading file `%s'", fname);
 421                         t = 0;  /* Try to exit cleanly */
 422                         break;
 423                     }
 424                     out(offset, segment, buf, OUT_RAWDATA, m,
 425                         NO_SEG, NO_SEG);
 426                     l -= m;
 427                 }
 428             }
 429             list->downlevel(LIST_INCBIN);
 430             if (instruction->times > 1) {
 431                 /*
 432                  * Dummy call to list->output to give the offset to the
 433                  * listing module.
 434                  */
 435                 list->output(offset, NULL, OUT_RAWDATA, 0);
 436                 list->uplevel(LIST_TIMES);
 437                 list->downlevel(LIST_TIMES);
 438             }
 439             fclose(fp);
 440             return instruction->times * len;
 441         }
 442         return 0;               /* if we're here, there's an error */
 443     }
 444
 445     /* Check to see if we need an address-size prefix */
 446     add_asp(instruction, bits);
 447
 448     size_prob = false;
 449
 450     for (temp = nasm_instructions[instruction->opcode]; temp->opcode != -1; temp++){
 451         int m = matches(temp, instruction, bits);
 452         if (m == 100 ||
 453             (m == 99 && jmp_match(segment, offset, bits,
 454                                   instruction, temp->code))) {
 455             /* Matches! */
 456             int64_t insn_size = calcsize(segment, offset, bits,
 457                                       instruction, temp->code);
 458             itimes = instruction->times;
 459             if (insn_size < 0)  /* shouldn't be, on pass two */
 460                 error(ERR_PANIC, "errors made it through from pass one");
 461             else
 462                 while (itimes--) {
 463                     for (j = 0; j < MAXPREFIX; j++) {
 464                         uint8_t c = 0;
 465                         switch (instruction->prefixes[j]) {
 466                         case P_LOCK:
 467                             c = 0xF0;
 468                             break;
 469                         case P_REPNE:
 470                         case P_REPNZ:
 471                             c = 0xF2;
 472                             break;
 473                         case P_REPE:
 474                         case P_REPZ:
 475                         case P_REP:
 476                             c = 0xF3;
 477                             break;
 478                         case R_CS:
 479                             if (bits == 64) {
 480                                 error(ERR_WARNING | ERR_PASS2,
 481                                       "cs segment base generated, but will be ignored in 64-bit mode");
 482                             }
 483                             c = 0x2E;
 484                             break;
 485                         case R_DS:
 486                             if (bits == 64) {
 487                                 error(ERR_WARNING | ERR_PASS2,
 488                                       "ds segment base generated, but will be ignored in 64-bit mode");
 489                             }
 490                             c = 0x3E;
 491                             break;
 492                         case R_ES:
 493                            if (bits == 64) {
 494                                 error(ERR_WARNING | ERR_PASS2,
 495                                       "es segment base generated, but will be ignored in 64-bit mode");
 496                            }
 497                             c = 0x26;
 498                             break;
 499                         case R_FS:
 500                             c = 0x64;
 501                             break;
 502                         case R_GS:
 503                             c = 0x65;
 504                             break;
 505                         case R_SS:
 506                             if (bits == 64) {
 507                                 error(ERR_WARNING | ERR_PASS2,
 508                                       "ss segment base generated, but will be ignored in 64-bit mode");
 509                             }
 510                             c = 0x36;
 511                             break;
 512                         case R_SEGR6:
 513                         case R_SEGR7:
 514                             error(ERR_NONFATAL,
 515                                   "segr6 and segr7 cannot be used as prefixes");
 516                             break;
 517                         case P_A16:
 518                             if (bits == 64) {
 519                                 error(ERR_NONFATAL,
 520                                       "16-bit addressing is not supported "
 521                                       "in 64-bit mode");
 522                             } else if (bits != 16)
 523                                 c = 0x67;
 524                             break;
 525                         case P_A32:
 526                             if (bits != 32)
 527                                 c = 0x67;
 528                             break;
 529                         case P_A64:
 530                             if (bits != 64) {
 531                                 error(ERR_NONFATAL,
 532                                       "64-bit addressing is only supported "
 533                                       "in 64-bit mode");
 534                             }
 535                             break;
 536                         case P_ASP:
 537                             c = 0x67;
 538                             break;
 539                         case P_O16:
 540                             if (bits != 16)
 541                                 c = 0x66;
 542                             break;
 543                         case P_O32:
 544                             if (bits == 16)
 545                                 c = 0x66;
 546                             break;
 547                         case P_O64:
 548                             /* REX.W */
 549                             break;
 550                         case P_OSP:
 551                             c = 0x66;
 552                             break;
 553                         case P_none:
 554                             break;
 555                         default:
 556                             error(ERR_PANIC, "invalid instruction prefix");
 557                         }
 558                         if (c != 0) {
 559                             out(offset, segment, &c, OUT_RAWDATA, 1,
 560                                 NO_SEG, NO_SEG);
 561                             offset++;
 562                         }
 563                     }
 564                     insn_end = offset + insn_size;
 565                     gencode(segment, offset, bits, instruction,
 566                             temp, insn_end);
 567                     offset += insn_size;
 568                     if (itimes > 0 && itimes == instruction->times - 1) {
 569                         /*
 570                          * Dummy call to list->output to give the offset to the
 571                          * listing module.
 572                          */
 573                         list->output(offset, NULL, OUT_RAWDATA, 0);
 574                         list->uplevel(LIST_TIMES);
 575                     }
 576                 }
 577             if (instruction->times > 1)
 578                 list->downlevel(LIST_TIMES);
 579             return offset - start;
 580         } else if (m > 0 && m > size_prob) {
 581             size_prob = m;
 582         }
 583     }
 584
 585     if (temp->opcode == -1) {   /* didn't match any instruction */
 586         switch (size_prob) {
 587         case 1:
 588             error(ERR_NONFATAL, "operation size not specified");
 589             break;
 590         case 2:
 591             error(ERR_NONFATAL, "mismatch in operand sizes");
 592             break;
 593         case 3:
 594             error(ERR_NONFATAL, "no instruction for this cpu level");
 595             break;
 596         case 4:
 597             error(ERR_NONFATAL, "instruction not supported in 64-bit mode");
 598             break;
 599         default:
 600             error(ERR_NONFATAL,
 601                   "invalid combination of opcode and operands");
 602             break;
 603         }
 604     }
 605     return 0;
 606 }
 607
 608 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 609                insn * instruction, efunc error)
 610 {
 611     const struct itemplate *temp;
 612
 613     errfunc = error;            /* to pass to other functions */
 614     cpu = cp;
 615
 616     if (instruction->opcode == -1)
 617         return 0;
 618
 619     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 620         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 621         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 622         instruction->opcode == I_DY) {
 623         extop *e;
 624         int32_t isize, osize, wsize = 0;   /* placate gcc */
 625
 626         isize = 0;
 627         switch (instruction->opcode) {
 628         case I_DB:
 629             wsize = 1;
 630             break;
 631         case I_DW:
 632             wsize = 2;
 633             break;
 634         case I_DD:
 635             wsize = 4;
 636             break;
 637         case I_DQ:
 638             wsize = 8;
 639             break;
 640         case I_DT:
 641             wsize = 10;
 642             break;
 643         case I_DO:
 644             wsize = 16;
 645             break;
 646         case I_DY:
 647             wsize = 32;
 648             break;
 649         default:
 650             break;
 651         }
 652
 653         for (e = instruction->eops; e; e = e->next) {
 654             int32_t align;
 655
 656             osize = 0;
 657             if (e->type == EOT_DB_NUMBER)
 658                 osize = 1;
 659             else if (e->type == EOT_DB_STRING ||
 660                      e->type == EOT_DB_STRING_FREE)
 661                 osize = e->stringlen;
 662
 663             align = (-osize) % wsize;
 664             if (align < 0)
 665                 align += wsize;
 666             isize += osize + align;
 667         }
 668         return isize * instruction->times;
 669     }
 670
 671     if (instruction->opcode == I_INCBIN) {
 672         const char *fname = instruction->eops->stringval;
 673         FILE *fp;
 674         size_t len;
 675
 676         fp = fopen(fname, "rb");
 677         if (!fp)
 678             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 679                   fname);
 680         else if (fseek(fp, 0L, SEEK_END) < 0)
 681             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 682                   fname);
 683         else {
 684             len = ftell(fp);
 685             fclose(fp);
 686             if (instruction->eops->next) {
 687                 len -= instruction->eops->next->offset;
 688                 if (instruction->eops->next->next &&
 689                     len > (size_t)instruction->eops->next->next->offset) {
 690                     len = (size_t)instruction->eops->next->next->offset;
 691                 }
 692             }
 693             return instruction->times * len;
 694         }
 695         return 0;               /* if we're here, there's an error */
 696     }
 697
 698     /* Check to see if we need an address-size prefix */
 699     add_asp(instruction, bits);
 700
 701     for (temp = nasm_instructions[instruction->opcode]; temp->opcode != -1; temp++) {
 702         int m = matches(temp, instruction, bits);
 703         if (m == 100 ||
 704             (m == 99 && jmp_match(segment, offset, bits,
 705                                   instruction, temp->code))) {
 706             /* we've matched an instruction. */
 707             int64_t isize;
 708             const uint8_t *codes = temp->code;
 709             int j;
 710
 711             isize = calcsize(segment, offset, bits, instruction, codes);
 712             if (isize < 0)
 713                 return -1;
 714             for (j = 0; j < MAXPREFIX; j++) {
 715                 switch (instruction->prefixes[j]) {
 716                 case P_A16:
 717                     if (bits != 16)
 718                         isize++;
 719                     break;
 720                 case P_A32:
 721                     if (bits != 32)
 722                         isize++;
 723                     break;
 724                 case P_O16:
 725                     if (bits != 16)
 726                         isize++;
 727                     break;
 728                 case P_O32:
 729                     if (bits == 16)
 730                         isize++;
 731                     break;
 732                 case P_A64:
 733                 case P_O64:
 734                 case P_none:
 735                     break;
 736                 default:
 737                     isize++;
 738                     break;
 739                 }
 740             }
 741             return isize * instruction->times;
 742         }
 743     }
 744     return -1;                  /* didn't match any instruction */
 745 }
 746
 747 static bool possible_sbyte(operand *o)
 748 {
 749     return !(o->opflags & OPFLAG_FORWARD) &&
 750         optimizing >= 0 && !(o->type & STRICT);
 751 }
 752
 753 /* check that opn[op]  is a signed byte of size 16 or 32 */
 754 static bool is_sbyte16(operand *o)
 755 {
 756     int16_t v;
 757
 758     if (!possible_sbyte(o))
 759         return false;
 760
 761     v = o->offset;
 762     return v >= -128 && v <= 127;
 763 }
 764
 765 static bool is_sbyte32(operand *o)
 766 {
 767     int32_t v;
 768
 769     if (!possible_sbyte(o))
 770         return false;
 771
 772     v = o->offset;
 773     return v >= -128 && v <= 127;
 774 }
 775
 776 /* check that opn[op] is a signed byte of size 32; warn if this is not
 777    the original value when extended to 64 bits */
 778 static bool is_sbyte64(operand *o)
 779 {
 780     int64_t v64;
 781     int32_t v;
 782
 783     if (!(o->wrt == NO_SEG && o->segment == NO_SEG))
 784         return false;           /* Not a pure immediate */
 785
 786     v64 = o->offset;
 787     v = (int32_t)v64;
 788
 789     if (v64 != v)
 790         errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
 791                 "signed dword immediate exceeds bounds");
 792
 793     /* dead in the water on forward reference or External */
 794     if (!possible_sbyte(o))
 795         return false;
 796
 797     v = o->offset;
 798     return v >= -128 && v <= 127;
 799 }
 800 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 801                         insn * ins, const uint8_t *codes)
 802 {
 803     int64_t length = 0;
 804     uint8_t c;
 805     int rex_mask = ~0;
 806     struct operand *opx;
 807
 808     ins->rex = 0;               /* Ensure REX is reset */
 809
 810     if (ins->prefixes[PPS_OSIZE] == P_O64)
 811         ins->rex |= REX_W;
 812
 813     (void)segment;              /* Don't warn that this parameter is unused */
 814     (void)offset;               /* Don't warn that this parameter is unused */
 815
 816     while (*codes) {
 817         c = *codes++;
 818         opx = &ins->oprs[c & 3];
 819         switch (c) {
 820         case 01:
 821         case 02:
 822         case 03:
 823             codes += c, length += c;
 824             break;
 825         case 04:
 826         case 05:
 827         case 06:
 828         case 07:
 829             length++;
 830             break;
 831         case 010:
 832         case 011:
 833         case 012:
 834         case 013:
 835             ins->rex |=
 836                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 837             codes++, length++;
 838             break;
 839         case 014:
 840         case 015:
 841         case 016:
 842         case 017:
 843             length++;
 844             break;
 845         case 020:
 846         case 021:
 847         case 022:
 848         case 023:
 849             length++;
 850             break;
 851         case 024:
 852         case 025:
 853         case 026:
 854         case 027:
 855             length++;
 856             break;
 857         case 030:
 858         case 031:
 859         case 032:
 860         case 033:
 861             length += 2;
 862             break;
 863         case 034:
 864         case 035:
 865         case 036:
 866         case 037:
 867             if (opx->type & (BITS16 | BITS32 | BITS64))
 868                 length += (opx->type & BITS16) ? 2 : 4;
 869             else
 870                 length += (bits == 16) ? 2 : 4;
 871             break;
 872         case 040:
 873         case 041:
 874         case 042:
 875         case 043:
 876             length += 4;
 877             break;
 878         case 044:
 879         case 045:
 880         case 046:
 881         case 047:
 882             length += ins->addr_size >> 3;
 883             break;
 884         case 050:
 885         case 051:
 886         case 052:
 887         case 053:
 888             length++;
 889             break;
 890         case 054:
 891         case 055:
 892         case 056:
 893         case 057:
 894             length += 8; /* MOV reg64/imm */
 895             break;
 896         case 060:
 897         case 061:
 898         case 062:
 899         case 063:
 900             length += 2;
 901             break;
 902         case 064:
 903         case 065:
 904         case 066:
 905         case 067:
 906             if (opx->type & (BITS16 | BITS32 | BITS64))
 907                 length += (opx->type & BITS16) ? 2 : 4;
 908             else
 909                 length += (bits == 16) ? 2 : 4;
 910             break;
 911         case 070:
 912         case 071:
 913         case 072:
 914         case 073:
 915             length += 4;
 916             break;
 917         case 074:
 918         case 075:
 919         case 076:
 920         case 077:
 921             length += 2;
 922             break;
 923         case 0140:
 924         case 0141:
 925         case 0142:
 926         case 0143:
 927             length += is_sbyte16(opx) ? 1 : 2;
 928             break;
 929         case 0144:
 930         case 0145:
 931         case 0146:
 932         case 0147:
 933             codes++;
 934             length++;
 935             break;
 936         case 0150:
 937         case 0151:
 938         case 0152:
 939         case 0153:
 940             length += is_sbyte32(opx) ? 1 : 4;
 941             break;
 942         case 0154:
 943         case 0155:
 944         case 0156:
 945         case 0157:
 946             codes++;
 947             length++;
 948             break;
 949         case 0160:
 950         case 0161:
 951         case 0162:
 952         case 0163:
 953             length++;
 954             ins->rex |= REX_D;
 955             ins->drexdst = regval(opx);
 956             break;
 957         case 0164:
 958         case 0165:
 959         case 0166:
 960         case 0167:
 961             length++;
 962             ins->rex |= REX_D|REX_OC;
 963             ins->drexdst = regval(opx);
 964             break;
 965         case 0171:
 966             break;
 967         case 0172:
 968         case 0173:
 969         case 0174:
 970             codes++;
 971             length++;
 972             break;
 973         case 0250:
 974         case 0251:
 975         case 0252:
 976         case 0253:
 977             length += is_sbyte64(opx) ? 1 : 4;
 978             break;
 979         case 0260:
 980         case 0261:
 981         case 0262:
 982         case 0263:
 983             ins->rex |= REX_V;
 984             ins->drexdst = regval(opx);
 985             ins->vex_m = *codes++;
 986             ins->vex_wlp = *codes++;
 987             break;
 988         case 0270:
 989             ins->rex |= REX_V;
 990             ins->drexdst = 0;
 991             ins->vex_m = *codes++;
 992             ins->vex_wlp = *codes++;
 993             break;
 994         case 0274:
 995         case 0275:
 996         case 0276:
 997         case 0277:
 998             length++;
 999             break;
1000         case 0300:
1001         case 0301:
1002         case 0302:
1003         case 0303:
1004             break;
1005         case 0310:
1006             if (bits == 64)
1007                 return -1;
1008             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
1009             break;
1010         case 0311:
1011             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
1012             break;
1013         case 0312:
1014             break;
1015         case 0313:
1016             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
1017                 has_prefix(ins, PPS_ASIZE, P_A32))
1018                 return -1;
1019             break;
1020         case 0314:
1021         case 0315:
1022         case 0316:
1023         case 0317:
1024             break;
1025         case 0320:
1026             length += (bits != 16);
1027             break;
1028         case 0321:
1029             length += (bits == 16);
1030             break;
1031         case 0322:
1032             break;
1033         case 0323:
1034             rex_mask &= ~REX_W;
1035             break;
1036         case 0324:
1037             ins->rex |= REX_W;
1038             break;
1039         case 0330:
1040             codes++, length++;
1041             break;
1042         case 0331:
1043             break;
1044         case 0332:
1045         case 0333:
1046             length++;
1047             break;
1048         case 0334:
1049             ins->rex |= REX_L;
1050             break;
1051         case 0335:
1052             break;
1053         case 0336:
1054             if (!ins->prefixes[PPS_LREP])
1055                 ins->prefixes[PPS_LREP] = P_REP;
1056             break;
1057         case 0337:
1058             if (!ins->prefixes[PPS_LREP])
1059                 ins->prefixes[PPS_LREP] = P_REPNE;
1060             break;
1061         case 0340:
1062             if (ins->oprs[0].segment != NO_SEG)
1063                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1064                         " quantity of BSS space");
1065             else
1066                 length += ins->oprs[0].offset;
1067             break;
1068         case 0360:
1069             break;
1070         case 0361:
1071         case 0362:
1072         case 0363:
1073             length++;
1074             break;
1075         case 0364:
1076         case 0365:
1077             break;
1078         case 0366:
1079         case 0367:
1080             length++;
1081             break;
1082         case 0370:
1083         case 0371:
1084         case 0372:
1085             break;
1086         case 0373:
1087             length++;
1088             break;
1089         default:               /* can't do it by 'case' statements */
1090             if (c >= 0100 && c <= 0277) {       /* it's an EA */
1091                 ea ea_data;
1092                 int rfield;
1093                 int32_t rflags;
1094                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1095
1096                 if (c <= 0177) {
1097                     /* pick rfield from operand b */
1098                     rflags = regflag(&ins->oprs[c & 7]);
1099                     rfield = nasm_regvals[ins->oprs[c & 7].basereg];
1100                 } else {
1101                     rflags = 0;
1102                     rfield = c & 7;
1103                 }
1104
1105                 if (!process_ea
1106                     (&ins->oprs[(c >> 3) & 7], &ea_data, bits,
1107                      ins->addr_size, rfield, rflags)) {
1108                     errfunc(ERR_NONFATAL, "invalid effective address");
1109                     return -1;
1110                 } else {
1111                     ins->rex |= ea_data.rex;
1112                     length += ea_data.size;
1113                 }
1114             } else {
1115                 errfunc(ERR_PANIC, "internal instruction table corrupt"
1116                         ": instruction code 0x%02X given", c);
1117             }
1118         }
1119     }
1120
1121     ins->rex &= rex_mask;
1122
1123     if (ins->rex & REX_V) {
1124         int bad32 = REX_R|REX_W|REX_X|REX_B;
1125
1126         if (ins->rex & REX_H) {
1127             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1128             return -1;
1129         }
1130         switch (ins->vex_wlp & 030) {
1131         case 000:
1132         case 020:
1133             ins->rex &= ~REX_W;
1134             break;
1135         case 010:
1136             ins->rex |= REX_W;
1137             bad32 &= ~REX_W;
1138             break;
1139         case 030:
1140             /* Follow REX_W */
1141             break;
1142         }
1143
1144         if (bits != 64 && ((ins->rex & bad32) || ins->drexdst > 7)) {
1145             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1146             return -1;
1147         }
1148         if (ins->vex_m != 1 || (ins->rex & (REX_W|REX_R|REX_B)))
1149             length += 3;
1150         else
1151             length += 2;
1152     } else if (ins->rex & REX_D) {
1153         if (ins->rex & REX_H) {
1154             errfunc(ERR_NONFATAL, "cannot use high register in drex instruction");
1155             return -1;
1156         }
1157         if (bits != 64 && ((ins->rex & (REX_R|REX_W|REX_X|REX_B)) ||
1158                            ins->drexdst > 7)) {
1159             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1160             return -1;
1161         }
1162         length++;
1163     } else if (ins->rex & REX_REAL) {
1164         if (ins->rex & REX_H) {
1165             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1166             return -1;
1167         } else if (bits == 64) {
1168             length++;
1169         } else if ((ins->rex & REX_L) &&
1170                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1171                    cpu >= IF_X86_64) {
1172             /* LOCK-as-REX.R */
1173             assert_no_prefix(ins, PPS_LREP);
1174             length++;
1175         } else {
1176             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1177             return -1;
1178         }
1179     }
1180
1181     return length;
1182 }
1183
1184 #define EMIT_REX()                                                      \
1185     if (!(ins->rex & (REX_D|REX_V)) && (ins->rex & REX_REAL) && (bits == 64)) { \
1186         ins->rex = (ins->rex & REX_REAL)|REX_P;                         \
1187         out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG); \
1188         ins->rex = 0;                                                   \
1189         offset += 1; \
1190     }
1191
1192 static void gencode(int32_t segment, int64_t offset, int bits,
1193                     insn * ins, const struct itemplate *temp,
1194                     int64_t insn_end)
1195 {
1196     static char condval[] = {   /* conditional opcodes */
1197         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1198         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1199         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1200     };
1201     uint8_t c;
1202     uint8_t bytes[4];
1203     int64_t size;
1204     int64_t data;
1205     struct operand *opx;
1206     const uint8_t *codes = temp->code;
1207
1208     while (*codes) {
1209         c = *codes++;
1210         opx = &ins->oprs[c & 3];
1211         switch (c) {
1212         case 01:
1213         case 02:
1214         case 03:
1215             EMIT_REX();
1216             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1217             codes += c;
1218             offset += c;
1219             break;
1220
1221         case 04:
1222         case 06:
1223             switch (ins->oprs[0].basereg) {
1224             case R_CS:
1225                 bytes[0] = 0x0E + (c == 0x04 ? 1 : 0);
1226                 break;
1227             case R_DS:
1228                 bytes[0] = 0x1E + (c == 0x04 ? 1 : 0);
1229                 break;
1230             case R_ES:
1231                 bytes[0] = 0x06 + (c == 0x04 ? 1 : 0);
1232                 break;
1233             case R_SS:
1234                 bytes[0] = 0x16 + (c == 0x04 ? 1 : 0);
1235                 break;
1236             default:
1237                 errfunc(ERR_PANIC,
1238                         "bizarre 8086 segment register received");
1239             }
1240             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1241             offset++;
1242             break;
1243
1244         case 05:
1245         case 07:
1246             switch (ins->oprs[0].basereg) {
1247             case R_FS:
1248                 bytes[0] = 0xA0 + (c == 0x05 ? 1 : 0);
1249                 break;
1250             case R_GS:
1251                 bytes[0] = 0xA8 + (c == 0x05 ? 1 : 0);
1252                 break;
1253             default:
1254                 errfunc(ERR_PANIC,
1255                         "bizarre 386 segment register received");
1256             }
1257             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1258             offset++;
1259             break;
1260
1261         case 010:
1262         case 011:
1263         case 012:
1264         case 013:
1265             EMIT_REX();
1266             bytes[0] = *codes++ + ((regval(opx)) & 7);
1267             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1268             offset += 1;
1269             break;
1270
1271         case 014:
1272         case 015:
1273         case 016:
1274         case 017:
1275             /* The test for BITS8 and SBYTE here is intended to avoid
1276                warning on optimizer actions due to SBYTE, while still
1277                warn on explicit BYTE directives.  Also warn, obviously,
1278                if the optimizer isn't enabled. */
1279             if (((opx->type & BITS8) ||
1280                  !(opx->type & temp->opd[c & 3] & BYTENESS)) &&
1281                 (opx->offset < -128 || opx->offset > 127)) {
1282                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1283                         "signed byte value exceeds bounds");
1284             }
1285             if (opx->segment != NO_SEG) {
1286                 data = opx->offset;
1287                 out(offset, segment, &data, OUT_ADDRESS, 1,
1288                     opx->segment, opx->wrt);
1289             } else {
1290                 bytes[0] = opx->offset;
1291                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1292                     NO_SEG);
1293             }
1294             offset += 1;
1295             break;
1296
1297         case 020:
1298         case 021:
1299         case 022:
1300         case 023:
1301             if (opx->offset < -256 || opx->offset > 255) {
1302                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1303                         "byte value exceeds bounds");
1304             }
1305             if (opx->segment != NO_SEG) {
1306                 data = opx->offset;
1307                 out(offset, segment, &data, OUT_ADDRESS, 1,
1308                     opx->segment, opx->wrt);
1309             } else {
1310                 bytes[0] = opx->offset;
1311                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1312                     NO_SEG);
1313             }
1314             offset += 1;
1315             break;
1316
1317         case 024:
1318         case 025:
1319         case 026:
1320         case 027:
1321             if (opx->offset < 0 || opx->offset > 255)
1322                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1323                         "unsigned byte value exceeds bounds");
1324             if (opx->segment != NO_SEG) {
1325                 data = opx->offset;
1326                 out(offset, segment, &data, OUT_ADDRESS, 1,
1327                     opx->segment, opx->wrt);
1328             } else {
1329                 bytes[0] = opx->offset;
1330                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1331                     NO_SEG);
1332             }
1333             offset += 1;
1334             break;
1335
1336         case 030:
1337         case 031:
1338         case 032:
1339         case 033:
1340             warn_overflow(2, opx);
1341             data = opx->offset;
1342             out(offset, segment, &data, OUT_ADDRESS, 2,
1343                 opx->segment, opx->wrt);
1344             offset += 2;
1345             break;
1346
1347         case 034:
1348         case 035:
1349         case 036:
1350         case 037:
1351             if (opx->type & (BITS16 | BITS32))
1352                 size = (opx->type & BITS16) ? 2 : 4;
1353             else
1354                 size = (bits == 16) ? 2 : 4;
1355             warn_overflow(size, opx);
1356             data = opx->offset;
1357             out(offset, segment, &data, OUT_ADDRESS, size,
1358                 opx->segment, opx->wrt);
1359             offset += size;
1360             break;
1361
1362         case 040:
1363         case 041:
1364         case 042:
1365         case 043:
1366             warn_overflow(4, opx);
1367             data = opx->offset;
1368             out(offset, segment, &data, OUT_ADDRESS, 4,
1369                 opx->segment, opx->wrt);
1370             offset += 4;
1371             break;
1372
1373         case 044:
1374         case 045:
1375         case 046:
1376         case 047:
1377             data = opx->offset;
1378             size = ins->addr_size >> 3;
1379             warn_overflow(size, opx);
1380             out(offset, segment, &data, OUT_ADDRESS, size,
1381                 opx->segment, opx->wrt);
1382             offset += size;
1383             break;
1384
1385         case 050:
1386         case 051:
1387         case 052:
1388         case 053:
1389             if (opx->segment != segment)
1390                 errfunc(ERR_NONFATAL,
1391                         "short relative jump outside segment");
1392             data = opx->offset - insn_end;
1393             if (data > 127 || data < -128)
1394                 errfunc(ERR_NONFATAL, "short jump is out of range");
1395             bytes[0] = data;
1396             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1397             offset += 1;
1398             break;
1399
1400         case 054:
1401         case 055:
1402         case 056:
1403         case 057:
1404             data = (int64_t)opx->offset;
1405             out(offset, segment, &data, OUT_ADDRESS, 8,
1406                 opx->segment, opx->wrt);
1407             offset += 8;
1408             break;
1409
1410         case 060:
1411         case 061:
1412         case 062:
1413         case 063:
1414             if (opx->segment != segment) {
1415                 data = opx->offset;
1416                 out(offset, segment, &data,
1417                     OUT_REL2ADR, insn_end - offset,
1418                     opx->segment, opx->wrt);
1419             } else {
1420                 data = opx->offset - insn_end;
1421                 out(offset, segment, &data,
1422                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1423             }
1424             offset += 2;
1425             break;
1426
1427         case 064:
1428         case 065:
1429         case 066:
1430         case 067:
1431             if (opx->type & (BITS16 | BITS32 | BITS64))
1432                 size = (opx->type & BITS16) ? 2 : 4;
1433             else
1434                 size = (bits == 16) ? 2 : 4;
1435             if (opx->segment != segment) {
1436                 data = opx->offset;
1437                 out(offset, segment, &data,
1438                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1439                     insn_end - offset, opx->segment, opx->wrt);
1440             } else {
1441                 data = opx->offset - insn_end;
1442                 out(offset, segment, &data,
1443                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1444             }
1445             offset += size;
1446             break;
1447
1448         case 070:
1449         case 071:
1450         case 072:
1451         case 073:
1452             if (opx->segment != segment) {
1453                 data = opx->offset;
1454                 out(offset, segment, &data,
1455                     OUT_REL4ADR, insn_end - offset,
1456                     opx->segment, opx->wrt);
1457             } else {
1458                 data = opx->offset - insn_end;
1459                 out(offset, segment, &data,
1460                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1461             }
1462             offset += 4;
1463             break;
1464
1465         case 074:
1466         case 075:
1467         case 076:
1468         case 077:
1469             if (opx->segment == NO_SEG)
1470                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1471                         " relocatable");
1472             data = 0;
1473             out(offset, segment, &data, OUT_ADDRESS, 2,
1474                 outfmt->segbase(1 + opx->segment),
1475                 opx->wrt);
1476             offset += 2;
1477             break;
1478
1479         case 0140:
1480         case 0141:
1481         case 0142:
1482         case 0143:
1483             data = opx->offset;
1484             warn_overflow(2, opx);
1485             if (is_sbyte16(opx)) {
1486                 bytes[0] = data;
1487                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1488                     NO_SEG);
1489                 offset++;
1490             } else {
1491                 out(offset, segment, &data, OUT_ADDRESS, 2,
1492                     opx->segment, opx->wrt);
1493                 offset += 2;
1494             }
1495             break;
1496
1497         case 0144:
1498         case 0145:
1499         case 0146:
1500         case 0147:
1501             EMIT_REX();
1502             bytes[0] = *codes++;
1503             if (is_sbyte16(opx))
1504                 bytes[0] |= 2;  /* s-bit */
1505             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1506             offset++;
1507             break;
1508
1509         case 0150:
1510         case 0151:
1511         case 0152:
1512         case 0153:
1513             data = opx->offset;
1514             warn_overflow(4, opx);
1515             if (is_sbyte32(opx)) {
1516                 bytes[0] = data;
1517                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1518                     NO_SEG);
1519                 offset++;
1520             } else {
1521                 out(offset, segment, &data, OUT_ADDRESS, 4,
1522                     opx->segment, opx->wrt);
1523                 offset += 4;
1524             }
1525             break;
1526
1527         case 0154:
1528         case 0155:
1529         case 0156:
1530         case 0157:
1531             EMIT_REX();
1532             bytes[0] = *codes++;
1533             if (is_sbyte32(opx))
1534                 bytes[0] |= 2;  /* s-bit */
1535             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1536             offset++;
1537             break;
1538
1539         case 0160:
1540         case 0161:
1541         case 0162:
1542         case 0163:
1543         case 0164:
1544         case 0165:
1545         case 0166:
1546         case 0167:
1547             break;
1548
1549         case 0171:
1550             bytes[0] =
1551                 (ins->drexdst << 4) |
1552                 (ins->rex & REX_OC ? 0x08 : 0) |
1553                 (ins->rex & (REX_R|REX_X|REX_B));
1554             ins->rex = 0;
1555             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1556             offset++;
1557             break;
1558
1559         case 0172:
1560             c = *codes++;
1561             opx = &ins->oprs[c >> 3];
1562             bytes[0] = nasm_regvals[opx->basereg] << 4;
1563             opx = &ins->oprs[c & 7];
1564             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1565                 errfunc(ERR_NONFATAL,
1566                         "non-absolute expression not permitted as argument %d",
1567                         c & 7);
1568             } else {
1569                 if (opx->offset & ~15) {
1570                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1571                             "four-bit argument exceeds bounds");
1572                 }
1573                 bytes[0] |= opx->offset & 15;
1574             }
1575             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1576             offset++;
1577             break;
1578
1579         case 0173:
1580             c = *codes++;
1581             opx = &ins->oprs[c >> 4];
1582             bytes[0] = nasm_regvals[opx->basereg] << 4;
1583             bytes[0] |= c & 15;
1584             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1585             offset++;
1586             break;
1587
1588         case 0174:
1589             c = *codes++;
1590             opx = &ins->oprs[c];
1591             bytes[0] = nasm_regvals[opx->basereg] << 4;
1592             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1593             offset++;
1594             break;
1595
1596         case 0250:
1597         case 0251:
1598         case 0252:
1599         case 0253:
1600             data = opx->offset;
1601             warn_overflow(4, opx);
1602             if (is_sbyte64(opx)) {
1603                 bytes[0] = data;
1604                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1605                     NO_SEG);
1606                 offset++;
1607             } else {
1608                 out(offset, segment, &data, OUT_ADDRESS, 4,
1609                     opx->segment, opx->wrt);
1610                 offset += 4;
1611             }
1612             break;
1613
1614         case 0260:
1615         case 0261:
1616         case 0262:
1617         case 0263:
1618         case 0270:
1619             codes += 2;
1620             if (ins->vex_m != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1621                 bytes[0] = 0xc4;
1622                 bytes[1] = ins->vex_m | ((~ins->rex & 7) << 5);
1623                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1624                     ((~ins->drexdst & 15)<< 3) | (ins->vex_wlp & 07);
1625                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1626                 offset += 3;
1627             } else {
1628                 bytes[0] = 0xc5;
1629                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1630                     ((~ins->drexdst & 15) << 3) | (ins->vex_wlp & 07);
1631                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1632                 offset += 2;
1633             }
1634             break;
1635
1636         case 0274:
1637         case 0275:
1638         case 0276:
1639         case 0277:
1640         {
1641             uint64_t uv, um;
1642             int s;
1643
1644             if (ins->rex & REX_W)
1645                 s = 64;
1646             else if (ins->prefixes[PPS_OSIZE] == P_O16)
1647                 s = 16;
1648             else if (ins->prefixes[PPS_OSIZE] == P_O32)
1649                 s = 32;
1650             else
1651                 s = bits;
1652
1653             um = (uint64_t)2 << (s-1);
1654             uv = opx->offset;
1655
1656             if (uv > 127 && uv < (uint64_t)-128 &&
1657                 (uv < um-128 || uv > um-1)) {
1658                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1659                         "signed byte value exceeds bounds");
1660             }
1661             if (opx->segment != NO_SEG) {
1662                 data = um;
1663                 out(offset, segment, &data, OUT_ADDRESS, 1,
1664                     opx->segment, opx->wrt);
1665             } else {
1666                 bytes[0] = um;
1667                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1668                     NO_SEG);
1669             }
1670             offset += 1;
1671             break;
1672         }
1673
1674         case 0300:
1675         case 0301:
1676         case 0302:
1677         case 0303:
1678             break;
1679
1680         case 0310:
1681             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1682                 *bytes = 0x67;
1683                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1684                 offset += 1;
1685             } else
1686                 offset += 0;
1687             break;
1688
1689         case 0311:
1690             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1691                 *bytes = 0x67;
1692                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1693                 offset += 1;
1694             } else
1695                 offset += 0;
1696             break;
1697
1698         case 0312:
1699             break;
1700
1701         case 0313:
1702             ins->rex = 0;
1703             break;
1704
1705         case 0314:
1706         case 0315:
1707         case 0316:
1708         case 0317:
1709             break;
1710
1711         case 0320:
1712             if (bits != 16) {
1713                 *bytes = 0x66;
1714                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1715                 offset += 1;
1716             } else
1717                 offset += 0;
1718             break;
1719
1720         case 0321:
1721             if (bits == 16) {
1722                 *bytes = 0x66;
1723                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1724                 offset += 1;
1725             } else
1726                 offset += 0;
1727             break;
1728
1729         case 0322:
1730         case 0323:
1731             break;
1732
1733         case 0324:
1734             ins->rex |= REX_W;
1735             break;
1736
1737         case 0330:
1738             *bytes = *codes++ ^ condval[ins->condition];
1739             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1740             offset += 1;
1741             break;
1742
1743         case 0331:
1744             break;
1745
1746         case 0332:
1747         case 0333:
1748             *bytes = c - 0332 + 0xF2;
1749             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1750             offset += 1;
1751             break;
1752
1753         case 0334:
1754             if (ins->rex & REX_R) {
1755                 *bytes = 0xF0;
1756                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1757                 offset += 1;
1758             }
1759             ins->rex &= ~(REX_L|REX_R);
1760             break;
1761
1762         case 0335:
1763             break;
1764
1765         case 0336:
1766         case 0337:
1767             break;
1768
1769         case 0340:
1770             if (ins->oprs[0].segment != NO_SEG)
1771                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1772             else {
1773                 int64_t size = ins->oprs[0].offset;
1774                 if (size > 0)
1775                     out(offset, segment, NULL,
1776                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1777                 offset += size;
1778             }
1779             break;
1780
1781         case 0360:
1782             break;
1783
1784         case 0361:
1785             bytes[0] = 0x66;
1786             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1787             offset += 1;
1788             break;
1789
1790         case 0362:
1791         case 0363:
1792             bytes[0] = c - 0362 + 0xf2;
1793             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1794             offset += 1;
1795             break;
1796
1797         case 0364:
1798         case 0365:
1799             break;
1800
1801         case 0366:
1802         case 0367:
1803             *bytes = c - 0366 + 0x66;
1804             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1805             offset += 1;
1806             break;
1807
1808         case 0370:
1809         case 0371:
1810         case 0372:
1811             break;
1812
1813         case 0373:
1814             *bytes = bits == 16 ? 3 : 5;
1815             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1816             offset += 1;
1817             break;
1818
1819         default:               /* can't do it by 'case' statements */
1820             if (c >= 0100 && c <= 0277) {       /* it's an EA */
1821                 ea ea_data;
1822                 int rfield;
1823                 int32_t rflags;
1824                 uint8_t *p;
1825                 int32_t s;
1826                 enum out_type type;
1827
1828                 if (c <= 0177) {
1829                     /* pick rfield from operand b */
1830                     rflags = regflag(&ins->oprs[c & 7]);
1831                     rfield = nasm_regvals[ins->oprs[c & 7].basereg];
1832                 } else {
1833                     /* rfield is constant */
1834                     rflags = 0;
1835                     rfield = c & 7;
1836                 }
1837
1838                 if (!process_ea
1839                     (&ins->oprs[(c >> 3) & 7], &ea_data, bits,
1840                      ins->addr_size, rfield, rflags)) {
1841                     errfunc(ERR_NONFATAL, "invalid effective address");
1842                 }
1843
1844
1845                 p = bytes;
1846                 *p++ = ea_data.modrm;
1847                 if (ea_data.sib_present)
1848                     *p++ = ea_data.sib;
1849
1850                 /* DREX suffixes come between the SIB and the displacement */
1851                 if (ins->rex & REX_D) {
1852                     *p++ =
1853                         (ins->drexdst << 4) |
1854                         (ins->rex & REX_OC ? 0x08 : 0) |
1855                         (ins->rex & (REX_R|REX_X|REX_B));
1856                     ins->rex = 0;
1857                 }
1858
1859                 s = p - bytes;
1860                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1861
1862                 /*
1863                  * Make sure the address gets the right offset in case
1864                  * the line breaks in the .lst file (BR 1197827)
1865                  */
1866                 offset += s;
1867                 s = 0;
1868
1869                 switch (ea_data.bytes) {
1870                 case 0:
1871                     break;
1872                 case 1:
1873                     if (ins->oprs[(c >> 3) & 7].segment != NO_SEG) {
1874                         data = ins->oprs[(c >> 3) & 7].offset;
1875                         out(offset, segment, &data, OUT_ADDRESS, 1,
1876                             ins->oprs[(c >> 3) & 7].segment,
1877                             ins->oprs[(c >> 3) & 7].wrt);
1878                     } else {
1879                         *bytes = ins->oprs[(c >> 3) & 7].offset;
1880                         out(offset, segment, bytes, OUT_RAWDATA, 1,
1881                             NO_SEG, NO_SEG);
1882                     }
1883                     s++;
1884                     break;
1885                 case 8:
1886                 case 2:
1887                 case 4:
1888                     data = ins->oprs[(c >> 3) & 7].offset;
1889                     warn_overflow(ea_data.bytes, opx);
1890                     s += ea_data.bytes;
1891                     if (ea_data.rip) {
1892                         data -= insn_end - (offset+ea_data.bytes);
1893                         type = OUT_REL4ADR;
1894                     } else {
1895                         type = OUT_ADDRESS;
1896                     }
1897                     out(offset, segment, &data, type, ea_data.bytes,
1898                         ins->oprs[(c >> 3) & 7].segment,
1899                         ins->oprs[(c >> 3) & 7].wrt);
1900                     break;
1901                 }
1902                 offset += s;
1903             } else {
1904                 errfunc(ERR_PANIC, "internal instruction table corrupt"
1905                         ": instruction code 0x%02X given", c);
1906             }
1907         }
1908     }
1909 }
1910
1911 static int32_t regflag(const operand * o)
1912 {
1913     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1914         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1915     }
1916     return nasm_reg_flags[o->basereg];
1917 }
1918
1919 static int32_t regval(const operand * o)
1920 {
1921     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1922         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1923     }
1924     return nasm_regvals[o->basereg];
1925 }
1926
1927 static int op_rexflags(const operand * o, int mask)
1928 {
1929     int32_t flags;
1930     int val;
1931
1932     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1933         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1934     }
1935
1936     flags = nasm_reg_flags[o->basereg];
1937     val = nasm_regvals[o->basereg];
1938
1939     return rexflags(val, flags, mask);
1940 }
1941
1942 static int rexflags(int val, int32_t flags, int mask)
1943 {
1944     int rex = 0;
1945
1946     if (val >= 8)
1947         rex |= REX_B|REX_X|REX_R;
1948     if (flags & BITS64)
1949         rex |= REX_W;
1950     if (!(REG_HIGH & ~flags))   /* AH, CH, DH, BH */
1951         rex |= REX_H;
1952     else if (!(REG8 & ~flags) && val >= 4) /* SPL, BPL, SIL, DIL */
1953         rex |= REX_P;
1954
1955     return rex & mask;
1956 }
1957
1958 static int matches(const struct itemplate *itemp, insn * instruction, int bits)
1959 {
1960     int i, size[MAX_OPERANDS], asize, oprs, ret;
1961
1962     ret = 100;
1963
1964     /*
1965      * Check the opcode
1966      */
1967     if (itemp->opcode != instruction->opcode)
1968         return 0;
1969
1970     /*
1971      * Count the operands
1972      */
1973     if (itemp->operands != instruction->operands)
1974         return 0;
1975
1976     /*
1977      * Check that no spurious colons or TOs are present
1978      */
1979     for (i = 0; i < itemp->operands; i++)
1980         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
1981             return 0;
1982
1983     /*
1984      * Process size flags
1985      */
1986     if (itemp->flags & IF_ARMASK) {
1987         memset(size, 0, sizeof size);
1988
1989         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
1990
1991         switch (itemp->flags & IF_SMASK) {
1992         case IF_SB:
1993             size[i] = BITS8;
1994             break;
1995         case IF_SW:
1996             size[i] = BITS16;
1997             break;
1998         case IF_SD:
1999             size[i] = BITS32;
2000             break;
2001         case IF_SQ:
2002             size[i] = BITS64;
2003             break;
2004         case IF_SO:
2005             size[i] = BITS128;
2006             break;
2007         case IF_SY:
2008             size[i] = BITS256;
2009             break;
2010         case IF_SZ:
2011             switch (bits) {
2012             case 16:
2013                 size[i] = BITS16;
2014                 break;
2015             case 32:
2016                 size[i] = BITS32;
2017                 break;
2018             case 64:
2019                 size[i] = BITS64;
2020                 break;
2021             }
2022             break;
2023         default:
2024             break;
2025         }
2026     } else {
2027         asize = 0;
2028         switch (itemp->flags & IF_SMASK) {
2029         case IF_SB:
2030             asize = BITS8;
2031             break;
2032         case IF_SW:
2033             asize = BITS16;
2034             break;
2035         case IF_SD:
2036             asize = BITS32;
2037             break;
2038         case IF_SQ:
2039             asize = BITS64;
2040             break;
2041         case IF_SO:
2042             asize = BITS128;
2043             break;
2044         case IF_SY:
2045             asize = BITS256;
2046             break;
2047         case IF_SZ:
2048             switch (bits) {
2049             case 16:
2050                 asize = BITS16;
2051                 break;
2052             case 32:
2053                 asize = BITS32;
2054                 break;
2055             case 64:
2056                 asize = BITS64;
2057                 break;
2058             }
2059             break;
2060         default:
2061             break;
2062         }
2063         for (i = 0; i < MAX_OPERANDS; i++)
2064             size[i] = asize;
2065     }
2066
2067     /*
2068      * Check that the operand flags all match up
2069      */
2070     for (i = 0; i < itemp->operands; i++) {
2071         int32_t type = instruction->oprs[i].type;
2072         if (!(type & SIZE_MASK))
2073             type |= size[i];
2074
2075         if (itemp->opd[i] & SAME_AS) {
2076             int j = itemp->opd[i] & ~SAME_AS;
2077             if (type != instruction->oprs[j].type ||
2078                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2079                 return 0;
2080         } else if (itemp->opd[i] & ~type ||
2081             ((itemp->opd[i] & SIZE_MASK) &&
2082              ((itemp->opd[i] ^ type) & SIZE_MASK))) {
2083             if ((itemp->opd[i] & ~type & ~SIZE_MASK) ||
2084                 (type & SIZE_MASK))
2085                 return 0;
2086             else
2087                 return 1;
2088         }
2089     }
2090
2091     /*
2092      * Check operand sizes
2093      */
2094     if (itemp->flags & (IF_SM | IF_SM2)) {
2095         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2096         asize = 0;
2097         for (i = 0; i < oprs; i++) {
2098             if ((asize = itemp->opd[i] & SIZE_MASK) != 0) {
2099                 int j;
2100                 for (j = 0; j < oprs; j++)
2101                     size[j] = asize;
2102                 break;
2103             }
2104         }
2105     } else {
2106         oprs = itemp->operands;
2107     }
2108
2109     for (i = 0; i < itemp->operands; i++) {
2110         if (!(itemp->opd[i] & SIZE_MASK) &&
2111             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2112             return 2;
2113     }
2114
2115     /*
2116      * Check template is okay at the set cpu level
2117      */
2118     if (((itemp->flags & IF_PLEVEL) > cpu))
2119         return 3;
2120
2121     /*
2122      * Check if instruction is available in long mode
2123      */
2124     if ((itemp->flags & IF_NOLONG) && (bits == 64))
2125         return 4;
2126
2127     /*
2128      * Check if special handling needed for Jumps
2129      */
2130     if ((uint8_t)(itemp->code[0]) >= 0370)
2131         return 99;
2132
2133     return ret;
2134 }
2135
2136 static ea *process_ea(operand * input, ea * output, int bits,
2137                       int addrbits, int rfield, int32_t rflags)
2138 {
2139     bool forw_ref = !!(input->opflags & OPFLAG_FORWARD);
2140
2141     output->rip = false;
2142
2143     /* REX flags for the rfield operand */
2144     output->rex |= rexflags(rfield, rflags, REX_R|REX_P|REX_W|REX_H);
2145
2146     if (!(REGISTER & ~input->type)) {   /* register direct */
2147         int i;
2148         int32_t f;
2149
2150         if (input->basereg < EXPR_REG_START /* Verify as Register */
2151             || input->basereg >= REG_ENUM_LIMIT)
2152             return NULL;
2153         f = regflag(input);
2154         i = nasm_regvals[input->basereg];
2155
2156         if (REG_EA & ~f)
2157             return NULL;        /* Invalid EA register */
2158
2159         output->rex |= op_rexflags(input, REX_B|REX_P|REX_W|REX_H);
2160
2161         output->sib_present = false;             /* no SIB necessary */
2162         output->bytes = 0;  /* no offset necessary either */
2163         output->modrm = 0xC0 | ((rfield & 7) << 3) | (i & 7);
2164     } else {                    /* it's a memory reference */
2165         if (input->basereg == -1
2166             && (input->indexreg == -1 || input->scale == 0)) {
2167             /* it's a pure offset */
2168             if (bits == 64 && (~input->type & IP_REL)) {
2169               int scale, index, base;
2170               output->sib_present = true;
2171               scale = 0;
2172               index = 4;
2173               base = 5;
2174               output->sib = (scale << 6) | (index << 3) | base;
2175               output->bytes = 4;
2176               output->modrm = 4 | ((rfield & 7) << 3);
2177               output->rip = false;
2178             } else {
2179               output->sib_present = false;
2180               output->bytes = (addrbits != 16 ? 4 : 2);
2181               output->modrm = (addrbits != 16 ? 5 : 6) | ((rfield & 7) << 3);
2182               output->rip = bits == 64;
2183             }
2184         } else {                /* it's an indirection */
2185             int i = input->indexreg, b = input->basereg, s = input->scale;
2186             int32_t o = input->offset, seg = input->segment;
2187             int hb = input->hintbase, ht = input->hinttype;
2188             int t;
2189             int it, bt;
2190             int32_t ix, bx;     /* register flags */
2191
2192             if (s == 0)
2193                 i = -1;         /* make this easy, at least */
2194
2195             if (i >= EXPR_REG_START && i < REG_ENUM_LIMIT) {
2196                 it = nasm_regvals[i];
2197                 ix = nasm_reg_flags[i];
2198             } else {
2199                 it = -1;
2200                 ix = 0;
2201             }
2202
2203             if (b >= EXPR_REG_START && b < REG_ENUM_LIMIT) {
2204                 bt = nasm_regvals[b];
2205                 bx = nasm_reg_flags[b];
2206             } else {
2207                 bt = -1;
2208                 bx = 0;
2209             }
2210
2211             /* check for a 32/64-bit memory reference... */
2212             if ((ix|bx) & (BITS32|BITS64)) {
2213                 /* it must be a 32/64-bit memory reference. Firstly we have
2214                  * to check that all registers involved are type E/Rxx. */
2215                 int32_t sok = BITS32|BITS64;
2216
2217                 if (it != -1) {
2218                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2219                         sok &= ix;
2220                     else
2221                         return NULL;
2222                 }
2223
2224                 if (bt != -1) {
2225                     if (REG_GPR & ~bx)
2226                         return NULL; /* Invalid register */
2227                     if (~sok & bx & SIZE_MASK)
2228                         return NULL; /* Invalid size */
2229                     sok &= bx;
2230                 }
2231
2232                 /* While we're here, ensure the user didn't specify
2233                    WORD or QWORD. */
2234                 if (input->disp_size == 16 || input->disp_size == 64)
2235                     return NULL;
2236
2237                 if (addrbits == 16 ||
2238                     (addrbits == 32 && !(sok & BITS32)) ||
2239                     (addrbits == 64 && !(sok & BITS64)))
2240                     return NULL;
2241
2242                 /* now reorganize base/index */
2243                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2244                     ((hb == b && ht == EAH_NOTBASE)
2245                      || (hb == i && ht == EAH_MAKEBASE))) {
2246                     /* swap if hints say so */
2247                     t = bt, bt = it, it = t;
2248                     t = bx, bx = ix, ix = t;
2249                 }
2250                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2251                     bt = -1, bx = 0, s++;
2252                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2253                     /* make single reg base, unless hint */
2254                     bt = it, bx = ix, it = -1, ix = 0;
2255                 }
2256                 if (((s == 2 && it != REG_NUM_ESP
2257                       && !(input->eaflags & EAF_TIMESTWO)) || s == 3
2258                      || s == 5 || s == 9) && bt == -1)
2259                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2260                 if (it == -1 && (bt & 7) != REG_NUM_ESP
2261                     && (input->eaflags & EAF_TIMESTWO))
2262                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2263                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2264                 if (s == 1 && it == REG_NUM_ESP) {
2265                     /* swap ESP into base if scale is 1 */
2266                     t = it, it = bt, bt = t;
2267                     t = ix, ix = bx, bx = t;
2268                 }
2269                 if (it == REG_NUM_ESP
2270                     || (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2271                     return NULL;        /* wrong, for various reasons */
2272
2273                 output->rex |= rexflags(it, ix, REX_X);
2274                 output->rex |= rexflags(bt, bx, REX_B);
2275
2276                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2277                     /* no SIB needed */
2278                     int mod, rm;
2279
2280                     if (bt == -1) {
2281                         rm = 5;
2282                         mod = 0;
2283                     } else {
2284                         rm = (bt & 7);
2285                         if (rm != REG_NUM_EBP && o == 0 &&
2286                                 seg == NO_SEG && !forw_ref &&
2287                                 !(input->eaflags &
2288                                   (EAF_BYTEOFFS | EAF_WORDOFFS)))
2289                             mod = 0;
2290                         else if (input->eaflags & EAF_BYTEOFFS ||
2291                                  (o >= -128 && o <= 127 && seg == NO_SEG
2292                                   && !forw_ref
2293                                   && !(input->eaflags & EAF_WORDOFFS)))
2294                             mod = 1;
2295                         else
2296                             mod = 2;
2297                     }
2298
2299                     output->sib_present = false;
2300                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2301                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2302                 } else {
2303                     /* we need a SIB */
2304                     int mod, scale, index, base;
2305
2306                     if (it == -1)
2307                         index = 4, s = 1;
2308                     else
2309                         index = (it & 7);
2310
2311                     switch (s) {
2312                     case 1:
2313                         scale = 0;
2314                         break;
2315                     case 2:
2316                         scale = 1;
2317                         break;
2318                     case 4:
2319                         scale = 2;
2320                         break;
2321                     case 8:
2322                         scale = 3;
2323                         break;
2324                     default:   /* then what the smeg is it? */
2325                         return NULL;    /* panic */
2326                     }
2327
2328                     if (bt == -1) {
2329                         base = 5;
2330                         mod = 0;
2331                     } else {
2332                         base = (bt & 7);
2333                         if (base != REG_NUM_EBP && o == 0 &&
2334                                     seg == NO_SEG && !forw_ref &&
2335                                     !(input->eaflags &
2336                                       (EAF_BYTEOFFS | EAF_WORDOFFS)))
2337                             mod = 0;
2338                         else if (input->eaflags & EAF_BYTEOFFS ||
2339                                  (o >= -128 && o <= 127 && seg == NO_SEG
2340                                   && !forw_ref
2341                                   && !(input->eaflags & EAF_WORDOFFS)))
2342                             mod = 1;
2343                         else
2344                             mod = 2;
2345                     }
2346
2347                     output->sib_present = true;
2348                     output->bytes =  (bt == -1 || mod == 2 ? 4 : mod);
2349                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | 4;
2350                     output->sib = (scale << 6) | (index << 3) | base;
2351                 }
2352             } else {            /* it's 16-bit */
2353                 int mod, rm;
2354
2355                 /* check for 64-bit long mode */
2356                 if (addrbits == 64)
2357                     return NULL;
2358
2359                 /* check all registers are BX, BP, SI or DI */
2360                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI
2361                      && b != R_DI) || (i != -1 && i != R_BP && i != R_BX
2362                                        && i != R_SI && i != R_DI))
2363                     return NULL;
2364
2365                 /* ensure the user didn't specify DWORD/QWORD */
2366                 if (input->disp_size == 32 || input->disp_size == 64)
2367                     return NULL;
2368
2369                 if (s != 1 && i != -1)
2370                     return NULL;        /* no can do, in 16-bit EA */
2371                 if (b == -1 && i != -1) {
2372                     int tmp = b;
2373                     b = i;
2374                     i = tmp;
2375                 }               /* swap */
2376                 if ((b == R_SI || b == R_DI) && i != -1) {
2377                     int tmp = b;
2378                     b = i;
2379                     i = tmp;
2380                 }
2381                 /* have BX/BP as base, SI/DI index */
2382                 if (b == i)
2383                     return NULL;        /* shouldn't ever happen, in theory */
2384                 if (i != -1 && b != -1 &&
2385                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2386                     return NULL;        /* invalid combinations */
2387                 if (b == -1)    /* pure offset: handled above */
2388                     return NULL;        /* so if it gets to here, panic! */
2389
2390                 rm = -1;
2391                 if (i != -1)
2392                     switch (i * 256 + b) {
2393                     case R_SI * 256 + R_BX:
2394                         rm = 0;
2395                         break;
2396                     case R_DI * 256 + R_BX:
2397                         rm = 1;
2398                         break;
2399                     case R_SI * 256 + R_BP:
2400                         rm = 2;
2401                         break;
2402                     case R_DI * 256 + R_BP:
2403                         rm = 3;
2404                         break;
2405                 } else
2406                     switch (b) {
2407                     case R_SI:
2408                         rm = 4;
2409                         break;
2410                     case R_DI:
2411                         rm = 5;
2412                         break;
2413                     case R_BP:
2414                         rm = 6;
2415                         break;
2416                     case R_BX:
2417                         rm = 7;
2418                         break;
2419                     }
2420                 if (rm == -1)   /* can't happen, in theory */
2421                     return NULL;        /* so panic if it does */
2422
2423                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2424                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2425                     mod = 0;
2426                 else if (input->eaflags & EAF_BYTEOFFS ||
2427                          (o >= -128 && o <= 127 && seg == NO_SEG
2428                           && !forw_ref
2429                           && !(input->eaflags & EAF_WORDOFFS)))
2430                     mod = 1;
2431                 else
2432                     mod = 2;
2433
2434                 output->sib_present = false;    /* no SIB - it's 16-bit */
2435                 output->bytes = mod;    /* bytes of offset needed */
2436                 output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2437             }
2438         }
2439     }
2440
2441     output->size = 1 + output->sib_present + output->bytes;
2442     return output;
2443 }
2444
2445 static void add_asp(insn *ins, int addrbits)
2446 {
2447     int j, valid;
2448     int defdisp;
2449
2450     valid = (addrbits == 64) ? 64|32 : 32|16;
2451
2452     switch (ins->prefixes[PPS_ASIZE]) {
2453     case P_A16:
2454         valid &= 16;
2455         break;
2456     case P_A32:
2457         valid &= 32;
2458         break;
2459     case P_A64:
2460         valid &= 64;
2461         break;
2462     case P_ASP:
2463         valid &= (addrbits == 32) ? 16 : 32;
2464         break;
2465     default:
2466         break;
2467     }
2468
2469     for (j = 0; j < ins->operands; j++) {
2470         if (!(MEMORY & ~ins->oprs[j].type)) {
2471             int32_t i, b;
2472
2473             /* Verify as Register */
2474             if (ins->oprs[j].indexreg < EXPR_REG_START
2475                 || ins->oprs[j].indexreg >= REG_ENUM_LIMIT)
2476                 i = 0;
2477             else
2478                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2479
2480             /* Verify as Register */
2481             if (ins->oprs[j].basereg < EXPR_REG_START
2482                 || ins->oprs[j].basereg >= REG_ENUM_LIMIT)
2483                 b = 0;
2484             else
2485                 b = nasm_reg_flags[ins->oprs[j].basereg];
2486
2487             if (ins->oprs[j].scale == 0)
2488                 i = 0;
2489
2490             if (!i && !b) {
2491                 int ds = ins->oprs[j].disp_size;
2492                 if ((addrbits != 64 && ds > 8) ||
2493                     (addrbits == 64 && ds == 16))
2494                     valid &= ds;
2495             } else {
2496                 if (!(REG16 & ~b))
2497                     valid &= 16;
2498                 if (!(REG32 & ~b))
2499                     valid &= 32;
2500                 if (!(REG64 & ~b))
2501                     valid &= 64;
2502
2503                 if (!(REG16 & ~i))
2504                     valid &= 16;
2505                 if (!(REG32 & ~i))
2506                     valid &= 32;
2507                 if (!(REG64 & ~i))
2508                     valid &= 64;
2509             }
2510         }
2511     }
2512
2513     if (valid & addrbits) {
2514         ins->addr_size = addrbits;
2515     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2516         /* Add an address size prefix */
2517         enum prefixes pref = (addrbits == 32) ? P_A16 : P_A32;
2518         ins->prefixes[PPS_ASIZE] = pref;
2519         ins->addr_size = (addrbits == 32) ? 16 : 32;
2520     } else {
2521         /* Impossible... */
2522         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2523         ins->addr_size = addrbits; /* Error recovery */
2524     }
2525
2526     defdisp = ins->addr_size == 16 ? 16 : 32;
2527
2528     for (j = 0; j < ins->operands; j++) {
2529         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2530             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp)
2531             != ins->addr_size) {
2532             /* mem_offs sizes must match the address size; if not,
2533                strip the MEM_OFFS bit and match only EA instructions */
2534             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2535         }
2536     }
2537 }