assemble.c

   1 /* assemble.c   code generation for the Netwide Assembler
   2  *
   3  * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
   4  * Julian Hall. All rights reserved. The software is
   5  * redistributable under the license given in the file "LICENSE"
   6  * distributed in the NASM archive.
   7  *
   8  * the actual codes (C syntax, i.e. octal):
   9  * \0            - terminates the code. (Unless it's a literal of course.)
  10  * \1..\4        - that many literal bytes follow in the code stream
  11  * \5            - add 4 to the primary operand number (b, low octdigit)
  12  * \6            - add 4 to the secondary operand number (a, middle octdigit)
  13  * \7            - add 4 to both the primary and the secondary operand number
  14  * \10..\13      - a literal byte follows in the code stream, to be added
  15  *                 to the register value of operand 0..3
  16  * \14..\17      - a signed byte immediate operand, from operand 0..3
  17  * \20..\23      - a byte immediate operand, from operand 0..3
  18  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  19  * \30..\33      - a word immediate operand, from operand 0..3
  20  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  21  *                 assembly mode or the operand-size override on the operand
  22  * \40..\43      - a long immediate operand, from operand 0..3
  23  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  24  *                 depending on the address size of the instruction.
  25  * \50..\53      - a byte relative operand, from operand 0..3
  26  * \54..\57      - a qword immediate operand, from operand 0..3
  27  * \60..\63      - a word relative operand, from operand 0..3
  28  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  29  *                 assembly mode or the operand-size override on the operand
  30  * \70..\73      - a long relative operand, from operand 0..3
  31  * \74..\77      - a word constant, from the _segment_ part of operand 0..3
  32  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  33  *                 field the register value of operand b.
  34  * \140..\143    - an immediate word or signed byte for operand 0..3
  35  * \144..\147    - or 2 (s-field) into opcode byte if operand 0..3
  36  *                  is a signed byte rather than a word.  Opcode byte follows.
  37  * \150..\153    - an immediate dword or signed byte for operand 0..3
  38  * \154..\157    - or 2 (s-field) into opcode byte if operand 0..3
  39  *                  is a signed byte rather than a dword.  Opcode byte follows.
  40  * \160..\163    - this instruction uses DREX rather than REX, with the
  41  *                 OC0 field set to 0, and the dest field taken from
  42  *                 operand 0..3.
  43  * \164..\167    - this instruction uses DREX rather than REX, with the
  44  *                 OC0 field set to 1, and the dest field taken from
  45  *                 operand 0..3.
  46  * \171          - placement of DREX suffix in the absence of an EA
  47  * \172\ab       - the register number from operand a in bits 7..4, with
  48  *                 the 4-bit immediate from operand b in bits 3..0.
  49  * \173\xab      - the register number from operand a in bits 7..4, with
  50  *                 the value b in bits 3..0.
  51  * \174\a        - the register number from operand a in bits 7..4, and
  52  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  53  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  54  *                 field equal to digit b.
  55  * \250..\253    - same as \150..\153, except warn if the 64-bit operand
  56  *                 is not equal to the truncated and sign-extended 32-bit
  57  *                 operand; used for 32-bit immediates in 64-bit mode.
  58  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
  59  * \260..\263    - this instruction uses VEX/XOP rather than REX, with the
  60  *                 V field taken from operand 0..3.
  61  * \270          - this instruction uses VEX/XOP rather than REX, with the
  62  *                 V field set to 1111b.
  63  *
  64  * VEX/XOP prefixes are followed by the sequence:
  65  * \tmm\wlp        where mm is the M field; and wlp is:
  66  *                 00 0ww lpp
  67  *                 [w0] ww = 0 for W = 0
  68  *                 [w1] ww = 1 for W = 1
  69  *                 [wx] ww = 2 for W don't care (always assembled as 0)
  70  *                 [ww] ww = 3 for W used as REX.W
  71  *
  72  * t = 0 for VEX (C4/C5), t = 1 for XOP (8F).
  73  *
  74  * \274..\277    - a signed byte immediate operand, from operand 0..3,
  75  *                 which is to be extended to the operand size.
  76  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
  77  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
  78  * \312          - (disassembler only) marker on LOOP, LOOPxx instructions.
  79  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
  80  * \314          - (disassembler only) invalid with REX.B
  81  * \315          - (disassembler only) invalid with REX.X
  82  * \316          - (disassembler only) invalid with REX.R
  83  * \317          - (disassembler only) invalid with REX.W
  84  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
  85  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
  86  * \322          - indicates that this instruction is only valid when the
  87  *                 operand size is the default (instruction to disassembler,
  88  *                 generates no code in the assembler)
  89  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
  90  * \324          - indicates 64-bit operand size requiring REX prefix.
  91  * \325          - instruction which always uses spl/bpl/sil/dil
  92  * \330          - a literal byte follows in the code stream, to be added
  93  *                 to the condition code value of the instruction.
  94  * \331          - instruction not valid with REP prefix.  Hint for
  95  *                 disassembler only; for SSE instructions.
  96  * \332          - REP prefix (0xF2 byte) used as opcode extension.
  97  * \333          - REP prefix (0xF3 byte) used as opcode extension.
  98  * \334          - LOCK prefix used as REX.R (used in non-64-bit mode)
  99  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
 100  * \336          - force a REP(E) prefix (0xF2) even if not specified.
 101  * \337          - force a REPNE prefix (0xF3) even if not specified.
 102  *                 \336-\337 are still listed as prefixes in the disassembler.
 103  * \340          - reserve <operand 0> bytes of uninitialized storage.
 104  *                 Operand 0 had better be a segmentless constant.
 105  * \341          - this instruction needs a WAIT "prefix"
 106  * \344,\345     - the PUSH/POP (respectively) codes for CS, DS, ES, SS
 107  *                 (POP is never used for CS) depending on operand 0
 108  * \346,\347     - the second byte of PUSH/POP codes for FS, GS, depending
 109  *                 on operand 0
 110  * \360          - no SSE prefix (== \364\331)
 111  * \361          - 66 SSE prefix (== \366\331)
 112  * \362          - F2 SSE prefix (== \364\332)
 113  * \363          - F3 SSE prefix (== \364\333)
 114  * \364          - operand-size prefix (0x66) not permitted
 115  * \365          - address-size prefix (0x67) not permitted
 116  * \366          - operand-size prefix (0x66) used as opcode extension
 117  * \367          - address-size prefix (0x67) used as opcode extension
 118  * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
 119  *                 370 is used for Jcc, 371 is used for JMP.
 120  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 121  *                 used for conditional jump over longer jump
 122  */
 123
 124 #include "compiler.h"
 125
 126 #include <stdio.h>
 127 #include <string.h>
 128 #include <inttypes.h>
 129
 130 #include "nasm.h"
 131 #include "nasmlib.h"
 132 #include "assemble.h"
 133 #include "insns.h"
 134 #include "tables.h"
 135
 136 typedef struct {
 137     int sib_present;                 /* is a SIB byte necessary? */
 138     int bytes;                       /* # of bytes of offset needed */
 139     int size;                        /* lazy - this is sib+bytes+1 */
 140     uint8_t modrm, sib, rex, rip;    /* the bytes themselves */
 141 } ea;
 142
 143 static uint32_t cpu;            /* cpu level received from nasm.c */
 144 static efunc errfunc;
 145 static struct ofmt *outfmt;
 146 static ListGen *list;
 147
 148 static int64_t calcsize(int32_t, int64_t, int, insn *, const uint8_t *);
 149 static void gencode(int32_t segment, int64_t offset, int bits,
 150                     insn * ins, const struct itemplate *temp,
 151                     int64_t insn_end);
 152 static int matches(const struct itemplate *, insn *, int bits);
 153 static int32_t regflag(const operand *);
 154 static int32_t regval(const operand *);
 155 static int rexflags(int, int32_t, int);
 156 static int op_rexflags(const operand *, int);
 157 static ea *process_ea(operand *, ea *, int, int, int, int32_t);
 158 static void add_asp(insn *, int);
 159
 160 static int has_prefix(insn * ins, enum prefix_pos pos, enum prefixes prefix)
 161 {
 162     return ins->prefixes[pos] == prefix;
 163 }
 164
 165 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 166 {
 167     if (ins->prefixes[pos])
 168         errfunc(ERR_NONFATAL, "invalid %s prefix",
 169                 prefix_name(ins->prefixes[pos]));
 170 }
 171
 172 static const char *size_name(int size)
 173 {
 174     switch (size) {
 175     case 1:
 176         return "byte";
 177     case 2:
 178         return "word";
 179     case 4:
 180         return "dword";
 181     case 8:
 182         return "qword";
 183     case 10:
 184         return "tword";
 185     case 16:
 186         return "oword";
 187     case 32:
 188         return "yword";
 189     default:
 190         return "???";
 191     }
 192 }
 193
 194 static void warn_overflow(int size, const struct operand *o)
 195 {
 196     if (size < 8 && o->wrt == NO_SEG && o->segment == NO_SEG) {
 197         int64_t lim = ((int64_t)1 << (size*8))-1;
 198         int64_t data = o->offset;
 199
 200         if (data < ~lim || data > lim)
 201             errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
 202                     "%s data exceeds bounds", size_name(size));
 203     }
 204 }
 205 /*
 206  * This routine wrappers the real output format's output routine,
 207  * in order to pass a copy of the data off to the listing file
 208  * generator at the same time.
 209  */
 210 static void out(int64_t offset, int32_t segto, const void *data,
 211                 enum out_type type, uint64_t size,
 212                 int32_t segment, int32_t wrt)
 213 {
 214     static int32_t lineno = 0;     /* static!!! */
 215     static char *lnfname = NULL;
 216     uint8_t p[8];
 217
 218     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 219         /*
 220          * This is a non-relocated address, and we're going to
 221          * convert it into RAWDATA format.
 222          */
 223         uint8_t *q = p;
 224
 225         if (size > 8) {
 226             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 227             return;
 228         }
 229
 230         WRITEADDR(q, *(int64_t *)data, size);
 231         data = p;
 232         type = OUT_RAWDATA;
 233     }
 234
 235     list->output(offset, data, type, size);
 236
 237     /*
 238      * this call to src_get determines when we call the
 239      * debug-format-specific "linenum" function
 240      * it updates lineno and lnfname to the current values
 241      * returning 0 if "same as last time", -2 if lnfname
 242      * changed, and the amount by which lineno changed,
 243      * if it did. thus, these variables must be static
 244      */
 245
 246     if (src_get(&lineno, &lnfname)) {
 247         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 248     }
 249
 250     outfmt->output(segto, data, type, size, segment, wrt);
 251 }
 252
 253 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 254                      insn * ins, const uint8_t *code)
 255 {
 256     int64_t isize;
 257     uint8_t c = code[0];
 258
 259     if ((c != 0370 && c != 0371) || (ins->oprs[0].type & STRICT))
 260         return false;
 261     if (!optimizing)
 262         return false;
 263     if (optimizing < 0 && c == 0371)
 264         return false;
 265
 266     isize = calcsize(segment, offset, bits, ins, code);
 267
 268     if (ins->oprs[0].opflags & OPFLAG_UNKNOWN)
 269         /* Be optimistic in pass 1 */
 270         return true;
 271
 272     if (ins->oprs[0].segment != segment)
 273         return false;
 274
 275     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 276     return (isize >= -128 && isize <= 127); /* is it byte size? */
 277 }
 278
 279 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 280               insn * instruction, struct ofmt *output, efunc error,
 281               ListGen * listgen)
 282 {
 283     const struct itemplate *temp;
 284     int j;
 285     int size_prob;
 286     int64_t insn_end;
 287     int32_t itimes;
 288     int64_t start = offset;
 289     int64_t wsize = 0;             /* size for DB etc. */
 290
 291     errfunc = error;            /* to pass to other functions */
 292     cpu = cp;
 293     outfmt = output;            /* likewise */
 294     list = listgen;             /* and again */
 295
 296     switch (instruction->opcode) {
 297     case -1:
 298         return 0;
 299     case I_DB:
 300         wsize = 1;
 301         break;
 302     case I_DW:
 303         wsize = 2;
 304         break;
 305     case I_DD:
 306         wsize = 4;
 307         break;
 308     case I_DQ:
 309         wsize = 8;
 310         break;
 311     case I_DT:
 312         wsize = 10;
 313         break;
 314     case I_DO:
 315         wsize = 16;
 316         break;
 317     case I_DY:
 318         wsize = 32;
 319         break;
 320     default:
 321         break;
 322     }
 323
 324     if (wsize) {
 325         extop *e;
 326         int32_t t = instruction->times;
 327         if (t < 0)
 328             errfunc(ERR_PANIC,
 329                     "instruction->times < 0 (%ld) in assemble()", t);
 330
 331         while (t--) {           /* repeat TIMES times */
 332             for (e = instruction->eops; e; e = e->next) {
 333                 if (e->type == EOT_DB_NUMBER) {
 334                     if (wsize == 1) {
 335                         if (e->segment != NO_SEG)
 336                             errfunc(ERR_NONFATAL,
 337                                     "one-byte relocation attempted");
 338                         else {
 339                             uint8_t out_byte = e->offset;
 340                             out(offset, segment, &out_byte,
 341                                 OUT_RAWDATA, 1, NO_SEG, NO_SEG);
 342                         }
 343                     } else if (wsize > 8) {
 344                         errfunc(ERR_NONFATAL,
 345                                 "integer supplied to a DT, DO or DY"
 346                                 " instruction");
 347                     } else
 348                         out(offset, segment, &e->offset,
 349                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 350                     offset += wsize;
 351                 } else if (e->type == EOT_DB_STRING ||
 352                            e->type == EOT_DB_STRING_FREE) {
 353                     int align;
 354
 355                     out(offset, segment, e->stringval,
 356                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 357                     align = e->stringlen % wsize;
 358
 359                     if (align) {
 360                         align = wsize - align;
 361                         out(offset, segment, zero_buffer,
 362                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 363                     }
 364                     offset += e->stringlen + align;
 365                 }
 366             }
 367             if (t > 0 && t == instruction->times - 1) {
 368                 /*
 369                  * Dummy call to list->output to give the offset to the
 370                  * listing module.
 371                  */
 372                 list->output(offset, NULL, OUT_RAWDATA, 0);
 373                 list->uplevel(LIST_TIMES);
 374             }
 375         }
 376         if (instruction->times > 1)
 377             list->downlevel(LIST_TIMES);
 378         return offset - start;
 379     }
 380
 381     if (instruction->opcode == I_INCBIN) {
 382         const char *fname = instruction->eops->stringval;
 383         FILE *fp;
 384
 385         fp = fopen(fname, "rb");
 386         if (!fp) {
 387             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 388                   fname);
 389         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 390             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 391                   fname);
 392         } else {
 393             static char buf[4096];
 394             size_t t = instruction->times;
 395             size_t base = 0;
 396             size_t len;
 397
 398             len = ftell(fp);
 399             if (instruction->eops->next) {
 400                 base = instruction->eops->next->offset;
 401                 len -= base;
 402                 if (instruction->eops->next->next &&
 403                     len > (size_t)instruction->eops->next->next->offset)
 404                     len = (size_t)instruction->eops->next->next->offset;
 405             }
 406             /*
 407              * Dummy call to list->output to give the offset to the
 408              * listing module.
 409              */
 410             list->output(offset, NULL, OUT_RAWDATA, 0);
 411             list->uplevel(LIST_INCBIN);
 412             while (t--) {
 413                 size_t l;
 414
 415                 fseek(fp, base, SEEK_SET);
 416                 l = len;
 417                 while (l > 0) {
 418                     int32_t m =
 419                         fread(buf, 1, (l > (int32_t) sizeof(buf) ? (int32_t) sizeof(buf) : l),
 420                               fp);
 421                     if (!m) {
 422                         /*
 423                          * This shouldn't happen unless the file
 424                          * actually changes while we are reading
 425                          * it.
 426                          */
 427                         error(ERR_NONFATAL,
 428                               "`incbin': unexpected EOF while"
 429                               " reading file `%s'", fname);
 430                         t = 0;  /* Try to exit cleanly */
 431                         break;
 432                     }
 433                     out(offset, segment, buf, OUT_RAWDATA, m,
 434                         NO_SEG, NO_SEG);
 435                     l -= m;
 436                 }
 437             }
 438             list->downlevel(LIST_INCBIN);
 439             if (instruction->times > 1) {
 440                 /*
 441                  * Dummy call to list->output to give the offset to the
 442                  * listing module.
 443                  */
 444                 list->output(offset, NULL, OUT_RAWDATA, 0);
 445                 list->uplevel(LIST_TIMES);
 446                 list->downlevel(LIST_TIMES);
 447             }
 448             fclose(fp);
 449             return instruction->times * len;
 450         }
 451         return 0;               /* if we're here, there's an error */
 452     }
 453
 454     /* Check to see if we need an address-size prefix */
 455     add_asp(instruction, bits);
 456
 457     size_prob = 0;
 458
 459     for (temp = nasm_instructions[instruction->opcode]; temp->opcode != -1; temp++){
 460         int m = matches(temp, instruction, bits);
 461         if (m == 100 ||
 462             (m == 99 && jmp_match(segment, offset, bits,
 463                                   instruction, temp->code))) {
 464             /* Matches! */
 465             int64_t insn_size = calcsize(segment, offset, bits,
 466                                       instruction, temp->code);
 467             itimes = instruction->times;
 468             if (insn_size < 0)  /* shouldn't be, on pass two */
 469                 error(ERR_PANIC, "errors made it through from pass one");
 470             else
 471                 while (itimes--) {
 472                     for (j = 0; j < MAXPREFIX; j++) {
 473                         uint8_t c = 0;
 474                         switch (instruction->prefixes[j]) {
 475                         case P_WAIT:
 476                             c = 0x9B;
 477                             break;
 478                         case P_LOCK:
 479                             c = 0xF0;
 480                             break;
 481                         case P_REPNE:
 482                         case P_REPNZ:
 483                             c = 0xF2;
 484                             break;
 485                         case P_REPE:
 486                         case P_REPZ:
 487                         case P_REP:
 488                             c = 0xF3;
 489                             break;
 490                         case R_CS:
 491                             if (bits == 64) {
 492                                 error(ERR_WARNING | ERR_PASS2,
 493                                       "cs segment base generated, but will be ignored in 64-bit mode");
 494                             }
 495                             c = 0x2E;
 496                             break;
 497                         case R_DS:
 498                             if (bits == 64) {
 499                                 error(ERR_WARNING | ERR_PASS2,
 500                                       "ds segment base generated, but will be ignored in 64-bit mode");
 501                             }
 502                             c = 0x3E;
 503                             break;
 504                         case R_ES:
 505                            if (bits == 64) {
 506                                 error(ERR_WARNING | ERR_PASS2,
 507                                       "es segment base generated, but will be ignored in 64-bit mode");
 508                            }
 509                             c = 0x26;
 510                             break;
 511                         case R_FS:
 512                             c = 0x64;
 513                             break;
 514                         case R_GS:
 515                             c = 0x65;
 516                             break;
 517                         case R_SS:
 518                             if (bits == 64) {
 519                                 error(ERR_WARNING | ERR_PASS2,
 520                                       "ss segment base generated, but will be ignored in 64-bit mode");
 521                             }
 522                             c = 0x36;
 523                             break;
 524                         case R_SEGR6:
 525                         case R_SEGR7:
 526                             error(ERR_NONFATAL,
 527                                   "segr6 and segr7 cannot be used as prefixes");
 528                             break;
 529                         case P_A16:
 530                             if (bits == 64) {
 531                                 error(ERR_NONFATAL,
 532                                       "16-bit addressing is not supported "
 533                                       "in 64-bit mode");
 534                             } else if (bits != 16)
 535                                 c = 0x67;
 536                             break;
 537                         case P_A32:
 538                             if (bits != 32)
 539                                 c = 0x67;
 540                             break;
 541                         case P_A64:
 542                             if (bits != 64) {
 543                                 error(ERR_NONFATAL,
 544                                       "64-bit addressing is only supported "
 545                                       "in 64-bit mode");
 546                             }
 547                             break;
 548                         case P_ASP:
 549                             c = 0x67;
 550                             break;
 551                         case P_O16:
 552                             if (bits != 16)
 553                                 c = 0x66;
 554                             break;
 555                         case P_O32:
 556                             if (bits == 16)
 557                                 c = 0x66;
 558                             break;
 559                         case P_O64:
 560                             /* REX.W */
 561                             break;
 562                         case P_OSP:
 563                             c = 0x66;
 564                             break;
 565                         case P_none:
 566                             break;
 567                         default:
 568                             error(ERR_PANIC, "invalid instruction prefix");
 569                         }
 570                         if (c != 0) {
 571                             out(offset, segment, &c, OUT_RAWDATA, 1,
 572                                 NO_SEG, NO_SEG);
 573                             offset++;
 574                         }
 575                     }
 576                     insn_end = offset + insn_size;
 577                     gencode(segment, offset, bits, instruction,
 578                             temp, insn_end);
 579                     offset += insn_size;
 580                     if (itimes > 0 && itimes == instruction->times - 1) {
 581                         /*
 582                          * Dummy call to list->output to give the offset to the
 583                          * listing module.
 584                          */
 585                         list->output(offset, NULL, OUT_RAWDATA, 0);
 586                         list->uplevel(LIST_TIMES);
 587                     }
 588                 }
 589             if (instruction->times > 1)
 590                 list->downlevel(LIST_TIMES);
 591             return offset - start;
 592         } else if (m > 0 && m > size_prob) {
 593             size_prob = m;
 594         }
 595     }
 596
 597     if (temp->opcode == -1) {   /* didn't match any instruction */
 598         switch (size_prob) {
 599         case 1:
 600             error(ERR_NONFATAL, "operation size not specified");
 601             break;
 602         case 2:
 603             error(ERR_NONFATAL, "mismatch in operand sizes");
 604             break;
 605         case 3:
 606             error(ERR_NONFATAL, "no instruction for this cpu level");
 607             break;
 608         case 4:
 609             error(ERR_NONFATAL, "instruction not supported in %d-bit mode",
 610                   bits);
 611             break;
 612         default:
 613             error(ERR_NONFATAL,
 614                   "invalid combination of opcode and operands");
 615             break;
 616         }
 617     }
 618     return 0;
 619 }
 620
 621 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 622                insn * instruction, efunc error)
 623 {
 624     const struct itemplate *temp;
 625
 626     errfunc = error;            /* to pass to other functions */
 627     cpu = cp;
 628
 629     if (instruction->opcode == -1)
 630         return 0;
 631
 632     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 633         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 634         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 635         instruction->opcode == I_DY) {
 636         extop *e;
 637         int32_t isize, osize, wsize = 0;   /* placate gcc */
 638
 639         isize = 0;
 640         switch (instruction->opcode) {
 641         case I_DB:
 642             wsize = 1;
 643             break;
 644         case I_DW:
 645             wsize = 2;
 646             break;
 647         case I_DD:
 648             wsize = 4;
 649             break;
 650         case I_DQ:
 651             wsize = 8;
 652             break;
 653         case I_DT:
 654             wsize = 10;
 655             break;
 656         case I_DO:
 657             wsize = 16;
 658             break;
 659         case I_DY:
 660             wsize = 32;
 661             break;
 662         default:
 663             break;
 664         }
 665
 666         for (e = instruction->eops; e; e = e->next) {
 667             int32_t align;
 668
 669             osize = 0;
 670             if (e->type == EOT_DB_NUMBER)
 671                 osize = 1;
 672             else if (e->type == EOT_DB_STRING ||
 673                      e->type == EOT_DB_STRING_FREE)
 674                 osize = e->stringlen;
 675
 676             align = (-osize) % wsize;
 677             if (align < 0)
 678                 align += wsize;
 679             isize += osize + align;
 680         }
 681         return isize * instruction->times;
 682     }
 683
 684     if (instruction->opcode == I_INCBIN) {
 685         const char *fname = instruction->eops->stringval;
 686         FILE *fp;
 687         size_t len;
 688
 689         fp = fopen(fname, "rb");
 690         if (!fp)
 691             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 692                   fname);
 693         else if (fseek(fp, 0L, SEEK_END) < 0)
 694             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 695                   fname);
 696         else {
 697             len = ftell(fp);
 698             fclose(fp);
 699             if (instruction->eops->next) {
 700                 len -= instruction->eops->next->offset;
 701                 if (instruction->eops->next->next &&
 702                     len > (size_t)instruction->eops->next->next->offset) {
 703                     len = (size_t)instruction->eops->next->next->offset;
 704                 }
 705             }
 706             return instruction->times * len;
 707         }
 708         return 0;               /* if we're here, there's an error */
 709     }
 710
 711     /* Check to see if we need an address-size prefix */
 712     add_asp(instruction, bits);
 713
 714     for (temp = nasm_instructions[instruction->opcode]; temp->opcode != -1; temp++) {
 715         int m = matches(temp, instruction, bits);
 716         if (m == 100 ||
 717             (m == 99 && jmp_match(segment, offset, bits,
 718                                   instruction, temp->code))) {
 719             /* we've matched an instruction. */
 720             int64_t isize;
 721             const uint8_t *codes = temp->code;
 722             int j;
 723
 724             isize = calcsize(segment, offset, bits, instruction, codes);
 725             if (isize < 0)
 726                 return -1;
 727             for (j = 0; j < MAXPREFIX; j++) {
 728                 switch (instruction->prefixes[j]) {
 729                 case P_A16:
 730                     if (bits != 16)
 731                         isize++;
 732                     break;
 733                 case P_A32:
 734                     if (bits != 32)
 735                         isize++;
 736                     break;
 737                 case P_O16:
 738                     if (bits != 16)
 739                         isize++;
 740                     break;
 741                 case P_O32:
 742                     if (bits == 16)
 743                         isize++;
 744                     break;
 745                 case P_A64:
 746                 case P_O64:
 747                 case P_none:
 748                     break;
 749                 default:
 750                     isize++;
 751                     break;
 752                 }
 753             }
 754             return isize * instruction->times;
 755         }
 756     }
 757     return -1;                  /* didn't match any instruction */
 758 }
 759
 760 static bool possible_sbyte(operand *o)
 761 {
 762     return o->wrt == NO_SEG && o->segment == NO_SEG &&
 763         !(o->opflags & OPFLAG_UNKNOWN) &&
 764         optimizing >= 0 && !(o->type & STRICT);
 765 }
 766
 767 /* check that opn[op]  is a signed byte of size 16 or 32 */
 768 static bool is_sbyte16(operand *o)
 769 {
 770     int16_t v;
 771
 772     if (!possible_sbyte(o))
 773         return false;
 774
 775     v = o->offset;
 776     return v >= -128 && v <= 127;
 777 }
 778
 779 static bool is_sbyte32(operand *o)
 780 {
 781     int32_t v;
 782
 783     if (!possible_sbyte(o))
 784         return false;
 785
 786     v = o->offset;
 787     return v >= -128 && v <= 127;
 788 }
 789
 790 /* Common construct */
 791 #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
 792
 793 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 794                         insn * ins, const uint8_t *codes)
 795 {
 796     int64_t length = 0;
 797     uint8_t c;
 798     int rex_mask = ~0;
 799     int op1, op2;
 800     struct operand *opx;
 801     uint8_t opex = 0;
 802
 803     ins->rex = 0;               /* Ensure REX is reset */
 804
 805     if (ins->prefixes[PPS_OSIZE] == P_O64)
 806         ins->rex |= REX_W;
 807
 808     (void)segment;              /* Don't warn that this parameter is unused */
 809     (void)offset;               /* Don't warn that this parameter is unused */
 810
 811     while (*codes) {
 812         c = *codes++;
 813         op1 = (c & 3) + ((opex & 1) << 2);
 814         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
 815         opx = &ins->oprs[op1];
 816         opex = 0;               /* For the next iteration */
 817
 818         switch (c) {
 819         case 01:
 820         case 02:
 821         case 03:
 822         case 04:
 823             codes += c, length += c;
 824             break;
 825
 826         case 05:
 827         case 06:
 828         case 07:
 829             opex = c;
 830             break;
 831
 832         case4(010):
 833             ins->rex |=
 834                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 835             codes++, length++;
 836             break;
 837
 838         case4(014):
 839         case4(020):
 840         case4(024):
 841             length++;
 842             break;
 843
 844         case4(030):
 845             length += 2;
 846             break;
 847
 848         case4(034):
 849             if (opx->type & (BITS16 | BITS32 | BITS64))
 850                 length += (opx->type & BITS16) ? 2 : 4;
 851             else
 852                 length += (bits == 16) ? 2 : 4;
 853             break;
 854
 855         case4(040):
 856             length += 4;
 857             break;
 858
 859         case4(044):
 860             length += ins->addr_size >> 3;
 861             break;
 862
 863         case4(050):
 864             length++;
 865             break;
 866
 867         case4(054):
 868             length += 8; /* MOV reg64/imm */
 869             break;
 870
 871         case4(060):
 872             length += 2;
 873             break;
 874
 875         case4(064):
 876             if (opx->type & (BITS16 | BITS32 | BITS64))
 877                 length += (opx->type & BITS16) ? 2 : 4;
 878             else
 879                 length += (bits == 16) ? 2 : 4;
 880             break;
 881
 882         case4(070):
 883             length += 4;
 884             break;
 885
 886         case4(074):
 887             length += 2;
 888             break;
 889
 890         case4(0140):
 891             length += is_sbyte16(opx) ? 1 : 2;
 892             break;
 893
 894         case4(0144):
 895             codes++;
 896             length++;
 897             break;
 898
 899         case4(0150):
 900             length += is_sbyte32(opx) ? 1 : 4;
 901             break;
 902
 903         case4(0154):
 904             codes++;
 905             length++;
 906             break;
 907
 908         case4(0160):
 909             length++;
 910             ins->rex |= REX_D;
 911             ins->drexdst = regval(opx);
 912             break;
 913
 914         case4(0164):
 915             length++;
 916             ins->rex |= REX_D|REX_OC;
 917             ins->drexdst = regval(opx);
 918             break;
 919
 920         case 0171:
 921             break;
 922
 923         case 0172:
 924         case 0173:
 925         case 0174:
 926             codes++;
 927             length++;
 928             break;
 929
 930         case4(0250):
 931             length += is_sbyte32(opx) ? 1 : 4;
 932             break;
 933
 934         case4(0254):
 935             length += 4;
 936             break;
 937
 938         case4(0260):
 939             ins->rex |= REX_V;
 940             ins->drexdst = regval(opx);
 941             ins->vex_cm = *codes++;
 942             ins->vex_wlp = *codes++;
 943             break;
 944
 945         case 0270:
 946             ins->rex |= REX_V;
 947             ins->drexdst = 0;
 948             ins->vex_cm = *codes++;
 949             ins->vex_wlp = *codes++;
 950             break;
 951
 952         case4(0274):
 953             length++;
 954             break;
 955
 956         case4(0300):
 957             break;
 958
 959         case 0310:
 960             if (bits == 64)
 961                 return -1;
 962             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
 963             break;
 964
 965         case 0311:
 966             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
 967             break;
 968
 969         case 0312:
 970             break;
 971
 972         case 0313:
 973             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
 974                 has_prefix(ins, PPS_ASIZE, P_A32))
 975                 return -1;
 976             break;
 977
 978         case4(0314):
 979             break;
 980
 981         case 0320:
 982             length += (bits != 16);
 983             break;
 984
 985         case 0321:
 986             length += (bits == 16);
 987             break;
 988
 989         case 0322:
 990             break;
 991
 992         case 0323:
 993             rex_mask &= ~REX_W;
 994             break;
 995
 996         case 0324:
 997             ins->rex |= REX_W;
 998             break;
 999
1000         case 0325:
1001             ins->rex |= REX_NH;
1002             break;
1003
1004         case 0330:
1005             codes++, length++;
1006             break;
1007
1008         case 0331:
1009             break;
1010
1011         case 0332:
1012         case 0333:
1013             length++;
1014             break;
1015
1016         case 0334:
1017             ins->rex |= REX_L;
1018             break;
1019
1020         case 0335:
1021             break;
1022
1023         case 0336:
1024             if (!ins->prefixes[PPS_LREP])
1025                 ins->prefixes[PPS_LREP] = P_REP;
1026             break;
1027
1028         case 0337:
1029             if (!ins->prefixes[PPS_LREP])
1030                 ins->prefixes[PPS_LREP] = P_REPNE;
1031             break;
1032
1033         case 0340:
1034             if (ins->oprs[0].segment != NO_SEG)
1035                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1036                         " quantity of BSS space");
1037             else
1038                 length += ins->oprs[0].offset;
1039             break;
1040
1041         case 0341:
1042             if (!ins->prefixes[PPS_WAIT])
1043                 ins->prefixes[PPS_WAIT] = P_WAIT;
1044             break;
1045
1046         case4(0344):
1047             length++;
1048             break;
1049
1050         case 0360:
1051             break;
1052
1053         case 0361:
1054         case 0362:
1055         case 0363:
1056             length++;
1057             break;
1058
1059         case 0364:
1060         case 0365:
1061             break;
1062
1063         case 0366:
1064         case 0367:
1065             length++;
1066             break;
1067
1068         case 0370:
1069         case 0371:
1070         case 0372:
1071             break;
1072
1073         case 0373:
1074             length++;
1075             break;
1076
1077         case4(0100):
1078         case4(0110):
1079         case4(0120):
1080         case4(0130):
1081         case4(0200):
1082         case4(0204):
1083         case4(0210):
1084         case4(0214):
1085         case4(0220):
1086         case4(0224):
1087         case4(0230):
1088         case4(0234):
1089             {
1090                 ea ea_data;
1091                 int rfield;
1092                 int32_t rflags;
1093                 struct operand *opy = &ins->oprs[op2];
1094
1095                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1096
1097                 if (c <= 0177) {
1098                     /* pick rfield from operand b (opx) */
1099                     rflags = regflag(opx);
1100                     rfield = nasm_regvals[opx->basereg];
1101                 } else {
1102                     rflags = 0;
1103                     rfield = c & 7;
1104                 }
1105                 if (!process_ea(opy, &ea_data, bits,
1106                                 ins->addr_size, rfield, rflags)) {
1107                     errfunc(ERR_NONFATAL, "invalid effective address");
1108                     return -1;
1109                 } else {
1110                     ins->rex |= ea_data.rex;
1111                     length += ea_data.size;
1112                 }
1113             }
1114             break;
1115
1116         default:
1117             errfunc(ERR_PANIC, "internal instruction table corrupt"
1118                     ": instruction code \\%o (0x%02X) given", c, c);
1119             break;
1120         }
1121     }
1122
1123     ins->rex &= rex_mask;
1124
1125     if (ins->rex & REX_NH) {
1126         if (ins->rex & REX_H) {
1127             errfunc(ERR_NONFATAL, "instruction cannot use high registers");
1128             return -1;
1129         }
1130         ins->rex &= ~REX_P;     /* Don't force REX prefix due to high reg */
1131     }
1132
1133     if (ins->rex & REX_V) {
1134         int bad32 = REX_R|REX_W|REX_X|REX_B;
1135
1136         if (ins->rex & REX_H) {
1137             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1138             return -1;
1139         }
1140         switch (ins->vex_wlp & 030) {
1141         case 000:
1142         case 020:
1143             ins->rex &= ~REX_W;
1144             break;
1145         case 010:
1146             ins->rex |= REX_W;
1147             bad32 &= ~REX_W;
1148             break;
1149         case 030:
1150             /* Follow REX_W */
1151             break;
1152         }
1153
1154         if (bits != 64 && ((ins->rex & bad32) || ins->drexdst > 7)) {
1155             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1156             return -1;
1157         }
1158         if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_R|REX_B)))
1159             length += 3;
1160         else
1161             length += 2;
1162     } else if (ins->rex & REX_D) {
1163         if (ins->rex & REX_H) {
1164             errfunc(ERR_NONFATAL, "cannot use high register in drex instruction");
1165             return -1;
1166         }
1167         if (bits != 64 && ((ins->rex & (REX_R|REX_W|REX_X|REX_B)) ||
1168                            ins->drexdst > 7)) {
1169             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1170             return -1;
1171         }
1172         length++;
1173     } else if (ins->rex & REX_REAL) {
1174         if (ins->rex & REX_H) {
1175             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1176             return -1;
1177         } else if (bits == 64) {
1178             length++;
1179         } else if ((ins->rex & REX_L) &&
1180                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1181                    cpu >= IF_X86_64) {
1182             /* LOCK-as-REX.R */
1183             assert_no_prefix(ins, PPS_LREP);
1184             length++;
1185         } else {
1186             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1187             return -1;
1188         }
1189     }
1190
1191     return length;
1192 }
1193
1194 #define EMIT_REX()                                                      \
1195     if (!(ins->rex & (REX_D|REX_V)) && (ins->rex & REX_REAL) && (bits == 64)) { \
1196         ins->rex = (ins->rex & REX_REAL)|REX_P;                         \
1197         out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG); \
1198         ins->rex = 0;                                                   \
1199         offset += 1; \
1200     }
1201
1202 static void gencode(int32_t segment, int64_t offset, int bits,
1203                     insn * ins, const struct itemplate *temp,
1204                     int64_t insn_end)
1205 {
1206     static char condval[] = {   /* conditional opcodes */
1207         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1208         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1209         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1210     };
1211     uint8_t c;
1212     uint8_t bytes[4];
1213     int64_t size;
1214     int64_t data;
1215     int op1, op2;
1216     struct operand *opx;
1217     const uint8_t *codes = temp->code;
1218     uint8_t opex = 0;
1219
1220     while (*codes) {
1221         c = *codes++;
1222         op1 = (c & 3) + ((opex & 1) << 2);
1223         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
1224         opx = &ins->oprs[op1];
1225         opex = 0;               /* For the next iteration */
1226
1227         switch (c) {
1228         case 01:
1229         case 02:
1230         case 03:
1231         case 04:
1232             EMIT_REX();
1233             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1234             codes += c;
1235             offset += c;
1236             break;
1237
1238         case 05:
1239         case 06:
1240         case 07:
1241             opex = c;
1242             break;
1243
1244         case4(010):
1245             EMIT_REX();
1246             bytes[0] = *codes++ + (regval(opx) & 7);
1247             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1248             offset += 1;
1249             break;
1250
1251         case4(014):
1252             /* The test for BITS8 and SBYTE here is intended to avoid
1253                warning on optimizer actions due to SBYTE, while still
1254                warn on explicit BYTE directives.  Also warn, obviously,
1255                if the optimizer isn't enabled. */
1256             if (((opx->type & BITS8) ||
1257                  !(opx->type & temp->opd[op1] & BYTENESS)) &&
1258                 (opx->offset < -128 || opx->offset > 127)) {
1259                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1260                         "signed byte value exceeds bounds");
1261             }
1262             if (opx->segment != NO_SEG) {
1263                 data = opx->offset;
1264                 out(offset, segment, &data, OUT_ADDRESS, 1,
1265                     opx->segment, opx->wrt);
1266             } else {
1267                 bytes[0] = opx->offset;
1268                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1269                     NO_SEG);
1270             }
1271             offset += 1;
1272             break;
1273
1274         case4(020):
1275             if (opx->offset < -256 || opx->offset > 255) {
1276                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1277                         "byte value exceeds bounds");
1278             }
1279             if (opx->segment != NO_SEG) {
1280                 data = opx->offset;
1281                 out(offset, segment, &data, OUT_ADDRESS, 1,
1282                     opx->segment, opx->wrt);
1283             } else {
1284                 bytes[0] = opx->offset;
1285                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1286                     NO_SEG);
1287             }
1288             offset += 1;
1289             break;
1290
1291         case4(024):
1292             if (opx->offset < 0 || opx->offset > 255)
1293                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1294                         "unsigned byte value exceeds bounds");
1295             if (opx->segment != NO_SEG) {
1296                 data = opx->offset;
1297                 out(offset, segment, &data, OUT_ADDRESS, 1,
1298                     opx->segment, opx->wrt);
1299             } else {
1300                 bytes[0] = opx->offset;
1301                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1302                     NO_SEG);
1303             }
1304             offset += 1;
1305             break;
1306
1307         case4(030):
1308             warn_overflow(2, opx);
1309             data = opx->offset;
1310             out(offset, segment, &data, OUT_ADDRESS, 2,
1311                 opx->segment, opx->wrt);
1312             offset += 2;
1313             break;
1314
1315         case4(034):
1316             if (opx->type & (BITS16 | BITS32))
1317                 size = (opx->type & BITS16) ? 2 : 4;
1318             else
1319                 size = (bits == 16) ? 2 : 4;
1320             warn_overflow(size, opx);
1321             data = opx->offset;
1322             out(offset, segment, &data, OUT_ADDRESS, size,
1323                 opx->segment, opx->wrt);
1324             offset += size;
1325             break;
1326
1327         case4(040):
1328             warn_overflow(4, opx);
1329             data = opx->offset;
1330             out(offset, segment, &data, OUT_ADDRESS, 4,
1331                 opx->segment, opx->wrt);
1332             offset += 4;
1333             break;
1334
1335         case4(044):
1336             data = opx->offset;
1337             size = ins->addr_size >> 3;
1338             warn_overflow(size, opx);
1339             out(offset, segment, &data, OUT_ADDRESS, size,
1340                 opx->segment, opx->wrt);
1341             offset += size;
1342             break;
1343
1344         case4(050):
1345             if (opx->segment != segment)
1346                 errfunc(ERR_NONFATAL,
1347                         "short relative jump outside segment");
1348             data = opx->offset - insn_end;
1349             if (data > 127 || data < -128)
1350                 errfunc(ERR_NONFATAL, "short jump is out of range");
1351             bytes[0] = data;
1352             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1353             offset += 1;
1354             break;
1355
1356         case4(054):
1357             data = (int64_t)opx->offset;
1358             out(offset, segment, &data, OUT_ADDRESS, 8,
1359                 opx->segment, opx->wrt);
1360             offset += 8;
1361             break;
1362
1363         case4(060):
1364             if (opx->segment != segment) {
1365                 data = opx->offset;
1366                 out(offset, segment, &data,
1367                     OUT_REL2ADR, insn_end - offset,
1368                     opx->segment, opx->wrt);
1369             } else {
1370                 data = opx->offset - insn_end;
1371                 out(offset, segment, &data,
1372                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1373             }
1374             offset += 2;
1375             break;
1376
1377         case4(064):
1378             if (opx->type & (BITS16 | BITS32 | BITS64))
1379                 size = (opx->type & BITS16) ? 2 : 4;
1380             else
1381                 size = (bits == 16) ? 2 : 4;
1382             if (opx->segment != segment) {
1383                 data = opx->offset;
1384                 out(offset, segment, &data,
1385                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1386                     insn_end - offset, opx->segment, opx->wrt);
1387             } else {
1388                 data = opx->offset - insn_end;
1389                 out(offset, segment, &data,
1390                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1391             }
1392             offset += size;
1393             break;
1394
1395         case4(070):
1396             if (opx->segment != segment) {
1397                 data = opx->offset;
1398                 out(offset, segment, &data,
1399                     OUT_REL4ADR, insn_end - offset,
1400                     opx->segment, opx->wrt);
1401             } else {
1402                 data = opx->offset - insn_end;
1403                 out(offset, segment, &data,
1404                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1405             }
1406             offset += 4;
1407             break;
1408
1409         case4(074):
1410             if (opx->segment == NO_SEG)
1411                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1412                         " relocatable");
1413             data = 0;
1414             out(offset, segment, &data, OUT_ADDRESS, 2,
1415                 outfmt->segbase(1 + opx->segment),
1416                 opx->wrt);
1417             offset += 2;
1418             break;
1419
1420         case4(0140):
1421             data = opx->offset;
1422             warn_overflow(2, opx);
1423             if (is_sbyte16(opx)) {
1424                 bytes[0] = data;
1425                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1426                     NO_SEG);
1427                 offset++;
1428             } else {
1429                 out(offset, segment, &data, OUT_ADDRESS, 2,
1430                     opx->segment, opx->wrt);
1431                 offset += 2;
1432             }
1433             break;
1434
1435         case4(0144):
1436             EMIT_REX();
1437             bytes[0] = *codes++;
1438             if (is_sbyte16(opx))
1439                 bytes[0] |= 2;  /* s-bit */
1440             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1441             offset++;
1442             break;
1443
1444         case4(0150):
1445             data = opx->offset;
1446             warn_overflow(4, opx);
1447             if (is_sbyte32(opx)) {
1448                 bytes[0] = data;
1449                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1450                     NO_SEG);
1451                 offset++;
1452             } else {
1453                 out(offset, segment, &data, OUT_ADDRESS, 4,
1454                     opx->segment, opx->wrt);
1455                 offset += 4;
1456             }
1457             break;
1458
1459         case4(0154):
1460             EMIT_REX();
1461             bytes[0] = *codes++;
1462             if (is_sbyte32(opx))
1463                 bytes[0] |= 2;  /* s-bit */
1464             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1465             offset++;
1466             break;
1467
1468         case4(0160):
1469         case4(0164):
1470             break;
1471
1472         case 0171:
1473             bytes[0] =
1474                 (ins->drexdst << 4) |
1475                 (ins->rex & REX_OC ? 0x08 : 0) |
1476                 (ins->rex & (REX_R|REX_X|REX_B));
1477             ins->rex = 0;
1478             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1479             offset++;
1480             break;
1481
1482         case 0172:
1483             c = *codes++;
1484             opx = &ins->oprs[c >> 3];
1485             bytes[0] = nasm_regvals[opx->basereg] << 4;
1486             opx = &ins->oprs[c & 7];
1487             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1488                 errfunc(ERR_NONFATAL,
1489                         "non-absolute expression not permitted as argument %d",
1490                         c & 7);
1491             } else {
1492                 if (opx->offset & ~15) {
1493                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1494                             "four-bit argument exceeds bounds");
1495                 }
1496                 bytes[0] |= opx->offset & 15;
1497             }
1498             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1499             offset++;
1500             break;
1501
1502         case 0173:
1503             c = *codes++;
1504             opx = &ins->oprs[c >> 4];
1505             bytes[0] = nasm_regvals[opx->basereg] << 4;
1506             bytes[0] |= c & 15;
1507             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1508             offset++;
1509             break;
1510
1511         case 0174:
1512             c = *codes++;
1513             opx = &ins->oprs[c];
1514             bytes[0] = nasm_regvals[opx->basereg] << 4;
1515             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1516             offset++;
1517             break;
1518
1519         case4(0250):
1520             data = opx->offset;
1521             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1522                 (int32_t)data != (int64_t)data) {
1523                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1524                         "signed dword immediate exceeds bounds");
1525             }
1526             if (is_sbyte32(opx)) {
1527                 bytes[0] = data;
1528                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1529                     NO_SEG);
1530                 offset++;
1531             } else {
1532                 out(offset, segment, &data, OUT_ADDRESS, 4,
1533                     opx->segment, opx->wrt);
1534                 offset += 4;
1535             }
1536             break;
1537
1538         case4(0254):
1539             data = opx->offset;
1540             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1541                 (int32_t)data != (int64_t)data) {
1542                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1543                         "signed dword immediate exceeds bounds");
1544             }
1545             out(offset, segment, &data, OUT_ADDRESS, 4,
1546                 opx->segment, opx->wrt);
1547             offset += 4;
1548             break;
1549
1550         case4(0260):
1551         case 0270:
1552             codes += 2;
1553             if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1554                 bytes[0] = (ins->vex_cm >> 6) ? 0x8f : 0xc4;
1555                 bytes[1] = (ins->vex_cm & 31) | ((~ins->rex & 7) << 5);
1556                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1557                     ((~ins->drexdst & 15)<< 3) | (ins->vex_wlp & 07);
1558                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1559                 offset += 3;
1560             } else {
1561                 bytes[0] = 0xc5;
1562                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1563                     ((~ins->drexdst & 15) << 3) | (ins->vex_wlp & 07);
1564                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1565                 offset += 2;
1566             }
1567             break;
1568
1569         case4(0274):
1570         {
1571             uint64_t uv, um;
1572             int s;
1573
1574             if (ins->rex & REX_W)
1575                 s = 64;
1576             else if (ins->prefixes[PPS_OSIZE] == P_O16)
1577                 s = 16;
1578             else if (ins->prefixes[PPS_OSIZE] == P_O32)
1579                 s = 32;
1580             else
1581                 s = bits;
1582
1583             um = (uint64_t)2 << (s-1);
1584             uv = opx->offset;
1585
1586             if (uv > 127 && uv < (uint64_t)-128 &&
1587                 (uv < um-128 || uv > um-1)) {
1588                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1589                         "signed byte value exceeds bounds");
1590             }
1591             if (opx->segment != NO_SEG) {
1592                 data = uv;
1593                 out(offset, segment, &data, OUT_ADDRESS, 1,
1594                     opx->segment, opx->wrt);
1595             } else {
1596                 bytes[0] = uv;
1597                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1598                     NO_SEG);
1599             }
1600             offset += 1;
1601             break;
1602         }
1603
1604         case4(0300):
1605             break;
1606
1607         case 0310:
1608             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1609                 *bytes = 0x67;
1610                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1611                 offset += 1;
1612             } else
1613                 offset += 0;
1614             break;
1615
1616         case 0311:
1617             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1618                 *bytes = 0x67;
1619                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1620                 offset += 1;
1621             } else
1622                 offset += 0;
1623             break;
1624
1625         case 0312:
1626             break;
1627
1628         case 0313:
1629             ins->rex = 0;
1630             break;
1631
1632         case4(0314):
1633             break;
1634
1635         case 0320:
1636             if (bits != 16) {
1637                 *bytes = 0x66;
1638                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1639                 offset += 1;
1640             } else
1641                 offset += 0;
1642             break;
1643
1644         case 0321:
1645             if (bits == 16) {
1646                 *bytes = 0x66;
1647                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1648                 offset += 1;
1649             } else
1650                 offset += 0;
1651             break;
1652
1653         case 0322:
1654         case 0323:
1655             break;
1656
1657         case 0324:
1658             ins->rex |= REX_W;
1659             break;
1660
1661         case 0325:
1662             break;
1663
1664         case 0330:
1665             *bytes = *codes++ ^ condval[ins->condition];
1666             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1667             offset += 1;
1668             break;
1669
1670         case 0331:
1671             break;
1672
1673         case 0332:
1674         case 0333:
1675             *bytes = c - 0332 + 0xF2;
1676             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1677             offset += 1;
1678             break;
1679
1680         case 0334:
1681             if (ins->rex & REX_R) {
1682                 *bytes = 0xF0;
1683                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1684                 offset += 1;
1685             }
1686             ins->rex &= ~(REX_L|REX_R);
1687             break;
1688
1689         case 0335:
1690             break;
1691
1692         case 0336:
1693         case 0337:
1694             break;
1695
1696         case 0340:
1697             if (ins->oprs[0].segment != NO_SEG)
1698                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1699             else {
1700                 int64_t size = ins->oprs[0].offset;
1701                 if (size > 0)
1702                     out(offset, segment, NULL,
1703                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1704                 offset += size;
1705             }
1706             break;
1707
1708         case 0341:
1709             break;
1710
1711         case 0344:
1712         case 0345:
1713             bytes[0] = c & 1;
1714             switch (ins->oprs[0].basereg) {
1715             case R_CS:
1716                 bytes[0] += 0x0E;
1717                 break;
1718             case R_DS:
1719                 bytes[0] += 0x1E;
1720                 break;
1721             case R_ES:
1722                 bytes[0] += 0x06;
1723                 break;
1724             case R_SS:
1725                 bytes[0] += 0x16;
1726                 break;
1727             default:
1728                 errfunc(ERR_PANIC,
1729                         "bizarre 8086 segment register received");
1730             }
1731             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1732             offset++;
1733             break;
1734
1735         case 0346:
1736         case 0347:
1737             bytes[0] = c & 1;
1738             switch (ins->oprs[0].basereg) {
1739             case R_FS:
1740                 bytes[0] += 0xA0;
1741                 break;
1742             case R_GS:
1743                 bytes[0] += 0xA8;
1744                 break;
1745             default:
1746                 errfunc(ERR_PANIC,
1747                         "bizarre 386 segment register received");
1748             }
1749             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1750             offset++;
1751             break;
1752
1753         case 0360:
1754             break;
1755
1756         case 0361:
1757             bytes[0] = 0x66;
1758             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1759             offset += 1;
1760             break;
1761
1762         case 0362:
1763         case 0363:
1764             bytes[0] = c - 0362 + 0xf2;
1765             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1766             offset += 1;
1767             break;
1768
1769         case 0364:
1770         case 0365:
1771             break;
1772
1773         case 0366:
1774         case 0367:
1775             *bytes = c - 0366 + 0x66;
1776             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1777             offset += 1;
1778             break;
1779
1780         case 0370:
1781         case 0371:
1782         case 0372:
1783             break;
1784
1785         case 0373:
1786             *bytes = bits == 16 ? 3 : 5;
1787             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1788             offset += 1;
1789             break;
1790
1791         case4(0100):
1792         case4(0110):
1793         case4(0120):
1794         case4(0130):
1795         case4(0200):
1796         case4(0204):
1797         case4(0210):
1798         case4(0214):
1799         case4(0220):
1800         case4(0224):
1801         case4(0230):
1802         case4(0234):
1803             {
1804                 ea ea_data;
1805                 int rfield;
1806                 int32_t rflags;
1807                 uint8_t *p;
1808                 int32_t s;
1809                 enum out_type type;
1810                 struct operand *opy = &ins->oprs[op2];
1811
1812                 if (c <= 0177) {
1813                     /* pick rfield from operand b (opx) */
1814                     rflags = regflag(opx);
1815                     rfield = nasm_regvals[opx->basereg];
1816                 } else {
1817                     /* rfield is constant */
1818                     rflags = 0;
1819                     rfield = c & 7;
1820                 }
1821
1822                 if (!process_ea(opy, &ea_data, bits, ins->addr_size,
1823                                 rfield, rflags)) {
1824                     errfunc(ERR_NONFATAL, "invalid effective address");
1825                 }
1826
1827
1828                 p = bytes;
1829                 *p++ = ea_data.modrm;
1830                 if (ea_data.sib_present)
1831                     *p++ = ea_data.sib;
1832
1833                 /* DREX suffixes come between the SIB and the displacement */
1834                 if (ins->rex & REX_D) {
1835                     *p++ = (ins->drexdst << 4) |
1836                            (ins->rex & REX_OC ? 0x08 : 0) |
1837                            (ins->rex & (REX_R|REX_X|REX_B));
1838                     ins->rex = 0;
1839                 }
1840
1841                 s = p - bytes;
1842                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1843
1844                 /*
1845                  * Make sure the address gets the right offset in case
1846                  * the line breaks in the .lst file (BR 1197827)
1847                  */
1848                 offset += s;
1849                 s = 0;
1850
1851                 switch (ea_data.bytes) {
1852                 case 0:
1853                     break;
1854                 case 1:
1855                 case 2:
1856                 case 4:
1857                 case 8:
1858                     data = opy->offset;
1859                     warn_overflow(ea_data.bytes, opy);
1860                     s += ea_data.bytes;
1861                     if (ea_data.rip) {
1862                         if (opy->segment == segment) {
1863                             data -= insn_end;
1864                             out(offset, segment, &data, OUT_ADDRESS,
1865                                 ea_data.bytes, NO_SEG, NO_SEG);
1866                         } else {
1867                             out(offset, segment, &data, OUT_REL4ADR,
1868                                 insn_end - offset, opy->segment, opy->wrt);
1869                         }
1870                     } else {
1871                         type = OUT_ADDRESS;
1872                         out(offset, segment, &data, OUT_ADDRESS,
1873                             ea_data.bytes, opy->segment, opy->wrt);
1874                     }
1875                     break;
1876                 default:
1877                     /* Impossible! */
1878                     errfunc(ERR_PANIC,
1879                             "Invalid amount of bytes (%d) for offset?!",
1880                             ea_data.bytes);
1881                     break;
1882                 }
1883                 offset += s;
1884             }
1885             break;
1886
1887         default:
1888             errfunc(ERR_PANIC, "internal instruction table corrupt"
1889                     ": instruction code \\%o (0x%02X) given", c, c);
1890             break;
1891         }
1892     }
1893 }
1894
1895 static int32_t regflag(const operand * o)
1896 {
1897     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1898         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1899     }
1900     return nasm_reg_flags[o->basereg];
1901 }
1902
1903 static int32_t regval(const operand * o)
1904 {
1905     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1906         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1907     }
1908     return nasm_regvals[o->basereg];
1909 }
1910
1911 static int op_rexflags(const operand * o, int mask)
1912 {
1913     int32_t flags;
1914     int val;
1915
1916     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1917         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1918     }
1919
1920     flags = nasm_reg_flags[o->basereg];
1921     val = nasm_regvals[o->basereg];
1922
1923     return rexflags(val, flags, mask);
1924 }
1925
1926 static int rexflags(int val, int32_t flags, int mask)
1927 {
1928     int rex = 0;
1929
1930     if (val >= 8)
1931         rex |= REX_B|REX_X|REX_R;
1932     if (flags & BITS64)
1933         rex |= REX_W;
1934     if (!(REG_HIGH & ~flags))   /* AH, CH, DH, BH */
1935         rex |= REX_H;
1936     else if (!(REG8 & ~flags) && val >= 4) /* SPL, BPL, SIL, DIL */
1937         rex |= REX_P;
1938
1939     return rex & mask;
1940 }
1941
1942 static int matches(const struct itemplate *itemp, insn * instruction, int bits)
1943 {
1944     int i, size[MAX_OPERANDS], asize, oprs, ret;
1945
1946     ret = 100;
1947
1948     /*
1949      * Check the opcode
1950      */
1951     if (itemp->opcode != instruction->opcode)
1952         return 0;
1953
1954     /*
1955      * Count the operands
1956      */
1957     if (itemp->operands != instruction->operands)
1958         return 0;
1959
1960     /*
1961      * Check that no spurious colons or TOs are present
1962      */
1963     for (i = 0; i < itemp->operands; i++)
1964         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
1965             return 0;
1966
1967     /*
1968      * Process size flags
1969      */
1970     if (itemp->flags & IF_ARMASK) {
1971         memset(size, 0, sizeof size);
1972
1973         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
1974
1975         switch (itemp->flags & IF_SMASK) {
1976         case IF_SB:
1977             size[i] = BITS8;
1978             break;
1979         case IF_SW:
1980             size[i] = BITS16;
1981             break;
1982         case IF_SD:
1983             size[i] = BITS32;
1984             break;
1985         case IF_SQ:
1986             size[i] = BITS64;
1987             break;
1988         case IF_SO:
1989             size[i] = BITS128;
1990             break;
1991         case IF_SY:
1992             size[i] = BITS256;
1993             break;
1994         case IF_SZ:
1995             switch (bits) {
1996             case 16:
1997                 size[i] = BITS16;
1998                 break;
1999             case 32:
2000                 size[i] = BITS32;
2001                 break;
2002             case 64:
2003                 size[i] = BITS64;
2004                 break;
2005             }
2006             break;
2007         default:
2008             break;
2009         }
2010     } else {
2011         asize = 0;
2012         switch (itemp->flags & IF_SMASK) {
2013         case IF_SB:
2014             asize = BITS8;
2015             break;
2016         case IF_SW:
2017             asize = BITS16;
2018             break;
2019         case IF_SD:
2020             asize = BITS32;
2021             break;
2022         case IF_SQ:
2023             asize = BITS64;
2024             break;
2025         case IF_SO:
2026             asize = BITS128;
2027             break;
2028         case IF_SY:
2029             asize = BITS256;
2030             break;
2031         case IF_SZ:
2032             switch (bits) {
2033             case 16:
2034                 asize = BITS16;
2035                 break;
2036             case 32:
2037                 asize = BITS32;
2038                 break;
2039             case 64:
2040                 asize = BITS64;
2041                 break;
2042             }
2043             break;
2044         default:
2045             break;
2046         }
2047         for (i = 0; i < MAX_OPERANDS; i++)
2048             size[i] = asize;
2049     }
2050
2051     /*
2052      * Check that the operand flags all match up
2053      */
2054     for (i = 0; i < itemp->operands; i++) {
2055         int32_t type = instruction->oprs[i].type;
2056         if (!(type & SIZE_MASK))
2057             type |= size[i];
2058
2059         if (itemp->opd[i] & SAME_AS) {
2060             int j = itemp->opd[i] & ~SAME_AS;
2061             if (type != instruction->oprs[j].type ||
2062                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2063                 return 0;
2064         } else if (itemp->opd[i] & ~type ||
2065             ((itemp->opd[i] & SIZE_MASK) &&
2066              ((itemp->opd[i] ^ type) & SIZE_MASK))) {
2067             if ((itemp->opd[i] & ~type & ~SIZE_MASK) ||
2068                 (type & SIZE_MASK))
2069                 return 0;
2070             else
2071                 return 1;
2072         }
2073     }
2074
2075     /*
2076      * Check operand sizes
2077      */
2078     if (itemp->flags & (IF_SM | IF_SM2)) {
2079         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2080         asize = 0;
2081         for (i = 0; i < oprs; i++) {
2082             if ((asize = itemp->opd[i] & SIZE_MASK) != 0) {
2083                 int j;
2084                 for (j = 0; j < oprs; j++)
2085                     size[j] = asize;
2086                 break;
2087             }
2088         }
2089     } else {
2090         oprs = itemp->operands;
2091     }
2092
2093     for (i = 0; i < itemp->operands; i++) {
2094         if (!(itemp->opd[i] & SIZE_MASK) &&
2095             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2096             return 2;
2097     }
2098
2099     /*
2100      * Check template is okay at the set cpu level
2101      */
2102     if (((itemp->flags & IF_PLEVEL) > cpu))
2103         return 3;
2104
2105     /*
2106      * Verify the appropriate long mode flag.
2107      */
2108     if ((itemp->flags & (bits == 64 ? IF_NOLONG : IF_LONG)))
2109         return 4;
2110
2111     /*
2112      * Check if special handling needed for Jumps
2113      */
2114     if ((uint8_t)(itemp->code[0]) >= 0370)
2115         return 99;
2116
2117     return ret;
2118 }
2119
2120 static ea *process_ea(operand * input, ea * output, int bits,
2121                       int addrbits, int rfield, int32_t rflags)
2122 {
2123     bool forw_ref = !!(input->opflags & OPFLAG_UNKNOWN);
2124
2125     output->rip = false;
2126
2127     /* REX flags for the rfield operand */
2128     output->rex |= rexflags(rfield, rflags, REX_R|REX_P|REX_W|REX_H);
2129
2130     if (!(REGISTER & ~input->type)) {   /* register direct */
2131         int i;
2132         int32_t f;
2133
2134         if (input->basereg < EXPR_REG_START /* Verify as Register */
2135             || input->basereg >= REG_ENUM_LIMIT)
2136             return NULL;
2137         f = regflag(input);
2138         i = nasm_regvals[input->basereg];
2139
2140         if (REG_EA & ~f)
2141             return NULL;        /* Invalid EA register */
2142
2143         output->rex |= op_rexflags(input, REX_B|REX_P|REX_W|REX_H);
2144
2145         output->sib_present = false;             /* no SIB necessary */
2146         output->bytes = 0;  /* no offset necessary either */
2147         output->modrm = 0xC0 | ((rfield & 7) << 3) | (i & 7);
2148     } else {                    /* it's a memory reference */
2149         if (input->basereg == -1
2150             && (input->indexreg == -1 || input->scale == 0)) {
2151             /* it's a pure offset */
2152             if (bits == 64 && (~input->type & IP_REL)) {
2153               int scale, index, base;
2154               output->sib_present = true;
2155               scale = 0;
2156               index = 4;
2157               base = 5;
2158               output->sib = (scale << 6) | (index << 3) | base;
2159               output->bytes = 4;
2160               output->modrm = 4 | ((rfield & 7) << 3);
2161               output->rip = false;
2162             } else {
2163               output->sib_present = false;
2164               output->bytes = (addrbits != 16 ? 4 : 2);
2165               output->modrm = (addrbits != 16 ? 5 : 6) | ((rfield & 7) << 3);
2166               output->rip = bits == 64;
2167             }
2168         } else {                /* it's an indirection */
2169             int i = input->indexreg, b = input->basereg, s = input->scale;
2170             int32_t o = input->offset, seg = input->segment;
2171             int hb = input->hintbase, ht = input->hinttype;
2172             int t;
2173             int it, bt;
2174             int32_t ix, bx;     /* register flags */
2175
2176             if (s == 0)
2177                 i = -1;         /* make this easy, at least */
2178
2179             if (i >= EXPR_REG_START && i < REG_ENUM_LIMIT) {
2180                 it = nasm_regvals[i];
2181                 ix = nasm_reg_flags[i];
2182             } else {
2183                 it = -1;
2184                 ix = 0;
2185             }
2186
2187             if (b >= EXPR_REG_START && b < REG_ENUM_LIMIT) {
2188                 bt = nasm_regvals[b];
2189                 bx = nasm_reg_flags[b];
2190             } else {
2191                 bt = -1;
2192                 bx = 0;
2193             }
2194
2195             /* check for a 32/64-bit memory reference... */
2196             if ((ix|bx) & (BITS32|BITS64)) {
2197                 /* it must be a 32/64-bit memory reference. Firstly we have
2198                  * to check that all registers involved are type E/Rxx. */
2199                 int32_t sok = BITS32|BITS64;
2200
2201                 if (it != -1) {
2202                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2203                         sok &= ix;
2204                     else
2205                         return NULL;
2206                 }
2207
2208                 if (bt != -1) {
2209                     if (REG_GPR & ~bx)
2210                         return NULL; /* Invalid register */
2211                     if (~sok & bx & SIZE_MASK)
2212                         return NULL; /* Invalid size */
2213                     sok &= bx;
2214                 }
2215
2216                 /* While we're here, ensure the user didn't specify
2217                    WORD or QWORD. */
2218                 if (input->disp_size == 16 || input->disp_size == 64)
2219                     return NULL;
2220
2221                 if (addrbits == 16 ||
2222                     (addrbits == 32 && !(sok & BITS32)) ||
2223                     (addrbits == 64 && !(sok & BITS64)))
2224                     return NULL;
2225
2226                 /* now reorganize base/index */
2227                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2228                     ((hb == b && ht == EAH_NOTBASE)
2229                      || (hb == i && ht == EAH_MAKEBASE))) {
2230                     /* swap if hints say so */
2231                     t = bt, bt = it, it = t;
2232                     t = bx, bx = ix, ix = t;
2233                 }
2234                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2235                     bt = -1, bx = 0, s++;
2236                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2237                     /* make single reg base, unless hint */
2238                     bt = it, bx = ix, it = -1, ix = 0;
2239                 }
2240                 if (((s == 2 && it != REG_NUM_ESP
2241                       && !(input->eaflags & EAF_TIMESTWO)) || s == 3
2242                      || s == 5 || s == 9) && bt == -1)
2243                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2244                 if (it == -1 && (bt & 7) != REG_NUM_ESP
2245                     && (input->eaflags & EAF_TIMESTWO))
2246                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2247                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2248                 if (s == 1 && it == REG_NUM_ESP) {
2249                     /* swap ESP into base if scale is 1 */
2250                     t = it, it = bt, bt = t;
2251                     t = ix, ix = bx, bx = t;
2252                 }
2253                 if (it == REG_NUM_ESP
2254                     || (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2255                     return NULL;        /* wrong, for various reasons */
2256
2257                 output->rex |= rexflags(it, ix, REX_X);
2258                 output->rex |= rexflags(bt, bx, REX_B);
2259
2260                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2261                     /* no SIB needed */
2262                     int mod, rm;
2263
2264                     if (bt == -1) {
2265                         rm = 5;
2266                         mod = 0;
2267                     } else {
2268                         rm = (bt & 7);
2269                         if (rm != REG_NUM_EBP && o == 0 &&
2270                                 seg == NO_SEG && !forw_ref &&
2271                                 !(input->eaflags &
2272                                   (EAF_BYTEOFFS | EAF_WORDOFFS)))
2273                             mod = 0;
2274                         else if (input->eaflags & EAF_BYTEOFFS ||
2275                                  (o >= -128 && o <= 127 && seg == NO_SEG
2276                                   && !forw_ref
2277                                   && !(input->eaflags & EAF_WORDOFFS)))
2278                             mod = 1;
2279                         else
2280                             mod = 2;
2281                     }
2282
2283                     output->sib_present = false;
2284                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2285                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2286                 } else {
2287                     /* we need a SIB */
2288                     int mod, scale, index, base;
2289
2290                     if (it == -1)
2291                         index = 4, s = 1;
2292                     else
2293                         index = (it & 7);
2294
2295                     switch (s) {
2296                     case 1:
2297                         scale = 0;
2298                         break;
2299                     case 2:
2300                         scale = 1;
2301                         break;
2302                     case 4:
2303                         scale = 2;
2304                         break;
2305                     case 8:
2306                         scale = 3;
2307                         break;
2308                     default:   /* then what the smeg is it? */
2309                         return NULL;    /* panic */
2310                     }
2311
2312                     if (bt == -1) {
2313                         base = 5;
2314                         mod = 0;
2315                     } else {
2316                         base = (bt & 7);
2317                         if (base != REG_NUM_EBP && o == 0 &&
2318                                     seg == NO_SEG && !forw_ref &&
2319                                     !(input->eaflags &
2320                                       (EAF_BYTEOFFS | EAF_WORDOFFS)))
2321                             mod = 0;
2322                         else if (input->eaflags & EAF_BYTEOFFS ||
2323                                  (o >= -128 && o <= 127 && seg == NO_SEG
2324                                   && !forw_ref
2325                                   && !(input->eaflags & EAF_WORDOFFS)))
2326                             mod = 1;
2327                         else
2328                             mod = 2;
2329                     }
2330
2331                     output->sib_present = true;
2332                     output->bytes =  (bt == -1 || mod == 2 ? 4 : mod);
2333                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | 4;
2334                     output->sib = (scale << 6) | (index << 3) | base;
2335                 }
2336             } else {            /* it's 16-bit */
2337                 int mod, rm;
2338
2339                 /* check for 64-bit long mode */
2340                 if (addrbits == 64)
2341                     return NULL;
2342
2343                 /* check all registers are BX, BP, SI or DI */
2344                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI
2345                      && b != R_DI) || (i != -1 && i != R_BP && i != R_BX
2346                                        && i != R_SI && i != R_DI))
2347                     return NULL;
2348
2349                 /* ensure the user didn't specify DWORD/QWORD */
2350                 if (input->disp_size == 32 || input->disp_size == 64)
2351                     return NULL;
2352
2353                 if (s != 1 && i != -1)
2354                     return NULL;        /* no can do, in 16-bit EA */
2355                 if (b == -1 && i != -1) {
2356                     int tmp = b;
2357                     b = i;
2358                     i = tmp;
2359                 }               /* swap */
2360                 if ((b == R_SI || b == R_DI) && i != -1) {
2361                     int tmp = b;
2362                     b = i;
2363                     i = tmp;
2364                 }
2365                 /* have BX/BP as base, SI/DI index */
2366                 if (b == i)
2367                     return NULL;        /* shouldn't ever happen, in theory */
2368                 if (i != -1 && b != -1 &&
2369                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2370                     return NULL;        /* invalid combinations */
2371                 if (b == -1)    /* pure offset: handled above */
2372                     return NULL;        /* so if it gets to here, panic! */
2373
2374                 rm = -1;
2375                 if (i != -1)
2376                     switch (i * 256 + b) {
2377                     case R_SI * 256 + R_BX:
2378                         rm = 0;
2379                         break;
2380                     case R_DI * 256 + R_BX:
2381                         rm = 1;
2382                         break;
2383                     case R_SI * 256 + R_BP:
2384                         rm = 2;
2385                         break;
2386                     case R_DI * 256 + R_BP:
2387                         rm = 3;
2388                         break;
2389                 } else
2390                     switch (b) {
2391                     case R_SI:
2392                         rm = 4;
2393                         break;
2394                     case R_DI:
2395                         rm = 5;
2396                         break;
2397                     case R_BP:
2398                         rm = 6;
2399                         break;
2400                     case R_BX:
2401                         rm = 7;
2402                         break;
2403                     }
2404                 if (rm == -1)   /* can't happen, in theory */
2405                     return NULL;        /* so panic if it does */
2406
2407                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2408                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2409                     mod = 0;
2410                 else if (input->eaflags & EAF_BYTEOFFS ||
2411                          (o >= -128 && o <= 127 && seg == NO_SEG
2412                           && !forw_ref
2413                           && !(input->eaflags & EAF_WORDOFFS)))
2414                     mod = 1;
2415                 else
2416                     mod = 2;
2417
2418                 output->sib_present = false;    /* no SIB - it's 16-bit */
2419                 output->bytes = mod;    /* bytes of offset needed */
2420                 output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2421             }
2422         }
2423     }
2424
2425     output->size = 1 + output->sib_present + output->bytes;
2426     return output;
2427 }
2428
2429 static void add_asp(insn *ins, int addrbits)
2430 {
2431     int j, valid;
2432     int defdisp;
2433
2434     valid = (addrbits == 64) ? 64|32 : 32|16;
2435
2436     switch (ins->prefixes[PPS_ASIZE]) {
2437     case P_A16:
2438         valid &= 16;
2439         break;
2440     case P_A32:
2441         valid &= 32;
2442         break;
2443     case P_A64:
2444         valid &= 64;
2445         break;
2446     case P_ASP:
2447         valid &= (addrbits == 32) ? 16 : 32;
2448         break;
2449     default:
2450         break;
2451     }
2452
2453     for (j = 0; j < ins->operands; j++) {
2454         if (!(MEMORY & ~ins->oprs[j].type)) {
2455             int32_t i, b;
2456
2457             /* Verify as Register */
2458             if (ins->oprs[j].indexreg < EXPR_REG_START
2459                 || ins->oprs[j].indexreg >= REG_ENUM_LIMIT)
2460                 i = 0;
2461             else
2462                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2463
2464             /* Verify as Register */
2465             if (ins->oprs[j].basereg < EXPR_REG_START
2466                 || ins->oprs[j].basereg >= REG_ENUM_LIMIT)
2467                 b = 0;
2468             else
2469                 b = nasm_reg_flags[ins->oprs[j].basereg];
2470
2471             if (ins->oprs[j].scale == 0)
2472                 i = 0;
2473
2474             if (!i && !b) {
2475                 int ds = ins->oprs[j].disp_size;
2476                 if ((addrbits != 64 && ds > 8) ||
2477                     (addrbits == 64 && ds == 16))
2478                     valid &= ds;
2479             } else {
2480                 if (!(REG16 & ~b))
2481                     valid &= 16;
2482                 if (!(REG32 & ~b))
2483                     valid &= 32;
2484                 if (!(REG64 & ~b))
2485                     valid &= 64;
2486
2487                 if (!(REG16 & ~i))
2488                     valid &= 16;
2489                 if (!(REG32 & ~i))
2490                     valid &= 32;
2491                 if (!(REG64 & ~i))
2492                     valid &= 64;
2493             }
2494         }
2495     }
2496
2497     if (valid & addrbits) {
2498         ins->addr_size = addrbits;
2499     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2500         /* Add an address size prefix */
2501         enum prefixes pref = (addrbits == 32) ? P_A16 : P_A32;
2502         ins->prefixes[PPS_ASIZE] = pref;
2503         ins->addr_size = (addrbits == 32) ? 16 : 32;
2504     } else {
2505         /* Impossible... */
2506         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2507         ins->addr_size = addrbits; /* Error recovery */
2508     }
2509
2510     defdisp = ins->addr_size == 16 ? 16 : 32;
2511
2512     for (j = 0; j < ins->operands; j++) {
2513         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2514             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp)
2515             != ins->addr_size) {
2516             /* mem_offs sizes must match the address size; if not,
2517                strip the MEM_OFFS bit and match only EA instructions */
2518             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2519         }
2520     }
2521 }