assemble.c

   1 /* ----------------------------------------------------------------------- *
   2  *
   3  *   Copyright 1996-2010 The NASM Authors - All Rights Reserved
   4  *   See the file AUTHORS included with the NASM distribution for
   5  *   the specific copyright holders.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following
   9  *   conditions are met:
  10  *
  11  *   * Redistributions of source code must retain the above copyright
  12  *     notice, this list of conditions and the following disclaimer.
  13  *   * Redistributions in binary form must reproduce the above
  14  *     copyright notice, this list of conditions and the following
  15  *     disclaimer in the documentation and/or other materials provided
  16  *     with the distribution.
  17  *
  18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  19  *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  20  *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  21  *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22  *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  23  *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24  *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  25  *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28  *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  29  *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  30  *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31  *
  32  * ----------------------------------------------------------------------- */
  33
  34 /*
  35  * assemble.c   code generation for the Netwide Assembler
  36  *
  37  * the actual codes (C syntax, i.e. octal):
  38  * \0            - terminates the code. (Unless it's a literal of course.)
  39  * \1..\4        - that many literal bytes follow in the code stream
  40  * \5            - add 4 to the primary operand number (b, low octdigit)
  41  * \6            - add 4 to the secondary operand number (a, middle octdigit)
  42  * \7            - add 4 to both the primary and the secondary operand number
  43  * \10..\13      - a literal byte follows in the code stream, to be added
  44  *                 to the register value of operand 0..3
  45  * \14..\17      - a signed byte immediate operand, from operand 0..3
  46  * \20..\23      - a byte immediate operand, from operand 0..3
  47  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  48  * \30..\33      - a word immediate operand, from operand 0..3
  49  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  50  *                 assembly mode or the operand-size override on the operand
  51  * \40..\43      - a long immediate operand, from operand 0..3
  52  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  53  *                 depending on the address size of the instruction.
  54  * \50..\53      - a byte relative operand, from operand 0..3
  55  * \54..\57      - a qword immediate operand, from operand 0..3
  56  * \60..\63      - a word relative operand, from operand 0..3
  57  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  58  *                 assembly mode or the operand-size override on the operand
  59  * \70..\73      - a long relative operand, from operand 0..3
  60  * \74..\77      - a word constant, from the _segment_ part of operand 0..3
  61  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  62  *                 field the register value of operand b.
  63  * \140..\143    - an immediate word or signed byte for operand 0..3
  64  * \144..\147    - or 2 (s-field) into opcode byte if operand 0..3
  65  *                  is a signed byte rather than a word.  Opcode byte follows.
  66  * \150..\153    - an immediate dword or signed byte for operand 0..3
  67  * \154..\157    - or 2 (s-field) into opcode byte if operand 0..3
  68  *                  is a signed byte rather than a dword.  Opcode byte follows.
  69  * \160..\163    - this instruction uses DREX rather than REX, with the
  70  *                 OC0 field set to 0, and the dest field taken from
  71  *                 operand 0..3.
  72  * \164..\167    - this instruction uses DREX rather than REX, with the
  73  *                 OC0 field set to 1, and the dest field taken from
  74  *                 operand 0..3.
  75  * \171          - placement of DREX suffix in the absence of an EA
  76  * \172\ab       - the register number from operand a in bits 7..4, with
  77  *                 the 4-bit immediate from operand b in bits 3..0.
  78  * \173\xab      - the register number from operand a in bits 7..4, with
  79  *                 the value b in bits 3..0.
  80  * \174\a        - the register number from operand a in bits 7..4, and
  81  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  82  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  83  *                 field equal to digit b.
  84  * \250..\253    - same as \150..\153, except warn if the 64-bit operand
  85  *                 is not equal to the truncated and sign-extended 32-bit
  86  *                 operand; used for 32-bit immediates in 64-bit mode.
  87  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
  88  * \260..\263    - this instruction uses VEX/XOP rather than REX, with the
  89  *                 V field taken from operand 0..3.
  90  * \270          - this instruction uses VEX/XOP rather than REX, with the
  91  *                 V field set to 1111b.
  92  *
  93  * VEX/XOP prefixes are followed by the sequence:
  94  * \tmm\wlp        where mm is the M field; and wlp is:
  95  *                 00 0ww lpp
  96  *                 [w0] ww = 0 for W = 0
  97  *                 [w1] ww = 1 for W = 1
  98  *                 [wx] ww = 2 for W don't care (always assembled as 0)
  99  *                 [ww] ww = 3 for W used as REX.W
 100  *
 101  * t = 0 for VEX (C4/C5), t = 1 for XOP (8F).
 102  *
 103  * \274..\277    - a signed byte immediate operand, from operand 0..3,
 104  *                 which is to be extended to the operand size.
 105  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
 106  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
 107  * \312          - (disassembler only) invalid with non-default address size.
 108  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
 109  * \314          - (disassembler only) invalid with REX.B
 110  * \315          - (disassembler only) invalid with REX.X
 111  * \316          - (disassembler only) invalid with REX.R
 112  * \317          - (disassembler only) invalid with REX.W
 113  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
 114  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
 115  * \322          - indicates that this instruction is only valid when the
 116  *                 operand size is the default (instruction to disassembler,
 117  *                 generates no code in the assembler)
 118  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
 119  * \324          - indicates 64-bit operand size requiring REX prefix.
 120  * \325          - instruction which always uses spl/bpl/sil/dil
 121  * \330          - a literal byte follows in the code stream, to be added
 122  *                 to the condition code value of the instruction.
 123  * \331          - instruction not valid with REP prefix.  Hint for
 124  *                 disassembler only; for SSE instructions.
 125  * \332          - REP prefix (0xF2 byte) used as opcode extension.
 126  * \333          - REP prefix (0xF3 byte) used as opcode extension.
 127  * \334          - LOCK prefix used as REX.R (used in non-64-bit mode)
 128  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
 129  * \336          - force a REP(E) prefix (0xF2) even if not specified.
 130  * \337          - force a REPNE prefix (0xF3) even if not specified.
 131  *                 \336-\337 are still listed as prefixes in the disassembler.
 132  * \340          - reserve <operand 0> bytes of uninitialized storage.
 133  *                 Operand 0 had better be a segmentless constant.
 134  * \341          - this instruction needs a WAIT "prefix"
 135  * \344,\345     - the PUSH/POP (respectively) codes for CS, DS, ES, SS
 136  *                 (POP is never used for CS) depending on operand 0
 137  * \346,\347     - the second byte of PUSH/POP codes for FS, GS, depending
 138  *                 on operand 0
 139  * \360          - no SSE prefix (== \364\331)
 140  * \361          - 66 SSE prefix (== \366\331)
 141  * \362          - F2 SSE prefix (== \364\332)
 142  * \363          - F3 SSE prefix (== \364\333)
 143  * \364          - operand-size prefix (0x66) not permitted
 144  * \365          - address-size prefix (0x67) not permitted
 145  * \366          - operand-size prefix (0x66) used as opcode extension
 146  * \367          - address-size prefix (0x67) used as opcode extension
 147  * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
 148  *                 370 is used for Jcc, 371 is used for JMP.
 149  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 150  *                 used for conditional jump over longer jump
 151  */
 152
 153 #include "compiler.h"
 154
 155 #include <stdio.h>
 156 #include <string.h>
 157 #include <inttypes.h>
 158
 159 #include "nasm.h"
 160 #include "nasmlib.h"
 161 #include "assemble.h"
 162 #include "insns.h"
 163 #include "tables.h"
 164
 165 enum match_result {
 166     /*
 167      * Matching errors.  These should be sorted so that more specific
 168      * errors come later in the sequence.
 169      */
 170     MERR_INVALOP,
 171     MERR_OPSIZEMISSING,
 172     MERR_OPSIZEMISMATCH,
 173     MERR_BADCPU,
 174     MERR_BADMODE,
 175     /*
 176      * Matching success; the conditional ones first
 177      */
 178     MOK_JUMP,                   /* Matching OK but needs jmp_match() */
 179     MOK_GOOD                    /* Matching unconditionally OK */
 180 };
 181
 182 typedef struct {
 183     int sib_present;                 /* is a SIB byte necessary? */
 184     int bytes;                       /* # of bytes of offset needed */
 185     int size;                        /* lazy - this is sib+bytes+1 */
 186     uint8_t modrm, sib, rex, rip;    /* the bytes themselves */
 187 } ea;
 188
 189 static uint32_t cpu;            /* cpu level received from nasm.c */
 190 static efunc errfunc;
 191 static struct ofmt *outfmt;
 192 static ListGen *list;
 193
 194 static int64_t calcsize(int32_t, int64_t, int, insn *, const uint8_t *);
 195 static void gencode(int32_t segment, int64_t offset, int bits,
 196                     insn * ins, const struct itemplate *temp,
 197                     int64_t insn_end);
 198 static enum match_result find_match(const struct itemplate **tempp,
 199                                     insn *instruction,
 200                                     int32_t segment, int64_t offset, int bits);
 201 static enum match_result matches(const struct itemplate *, insn *, int bits);
 202 static opflags_t regflag(const operand *);
 203 static int32_t regval(const operand *);
 204 static int rexflags(int, opflags_t, int);
 205 static int op_rexflags(const operand *, int);
 206 static ea *process_ea(operand *, ea *, int, int, int, opflags_t);
 207 static void add_asp(insn *, int);
 208
 209 static int has_prefix(insn * ins, enum prefix_pos pos, enum prefixes prefix)
 210 {
 211     return ins->prefixes[pos] == prefix;
 212 }
 213
 214 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 215 {
 216     if (ins->prefixes[pos])
 217         errfunc(ERR_NONFATAL, "invalid %s prefix",
 218                 prefix_name(ins->prefixes[pos]));
 219 }
 220
 221 static const char *size_name(int size)
 222 {
 223     switch (size) {
 224     case 1:
 225         return "byte";
 226     case 2:
 227         return "word";
 228     case 4:
 229         return "dword";
 230     case 8:
 231         return "qword";
 232     case 10:
 233         return "tword";
 234     case 16:
 235         return "oword";
 236     case 32:
 237         return "yword";
 238     default:
 239         return "???";
 240     }
 241 }
 242
 243 static void warn_overflow(int pass, int size)
 244 {
 245     errfunc(ERR_WARNING | pass | ERR_WARN_NOV,
 246             "%s data exceeds bounds", size_name(size));
 247 }
 248
 249 static void warn_overflow_const(int64_t data, int size)
 250 {
 251     if (overflow_general(data, size))
 252         warn_overflow(ERR_PASS1, size);
 253 }
 254
 255 static void warn_overflow_opd(const struct operand *o, int size)
 256 {
 257     if (o->wrt == NO_SEG && o->segment == NO_SEG) {
 258         if (overflow_general(o->offset, size))
 259             warn_overflow(ERR_PASS2, size);
 260     }
 261 }
 262
 263 /*
 264  * This routine wrappers the real output format's output routine,
 265  * in order to pass a copy of the data off to the listing file
 266  * generator at the same time.
 267  */
 268 static void out(int64_t offset, int32_t segto, const void *data,
 269                 enum out_type type, uint64_t size,
 270                 int32_t segment, int32_t wrt)
 271 {
 272     static int32_t lineno = 0;     /* static!!! */
 273     static char *lnfname = NULL;
 274     uint8_t p[8];
 275
 276     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 277         /*
 278          * This is a non-relocated address, and we're going to
 279          * convert it into RAWDATA format.
 280          */
 281         uint8_t *q = p;
 282
 283         if (size > 8) {
 284             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 285             return;
 286         }
 287
 288         WRITEADDR(q, *(int64_t *)data, size);
 289         data = p;
 290         type = OUT_RAWDATA;
 291     }
 292
 293     list->output(offset, data, type, size);
 294
 295     /*
 296      * this call to src_get determines when we call the
 297      * debug-format-specific "linenum" function
 298      * it updates lineno and lnfname to the current values
 299      * returning 0 if "same as last time", -2 if lnfname
 300      * changed, and the amount by which lineno changed,
 301      * if it did. thus, these variables must be static
 302      */
 303
 304     if (src_get(&lineno, &lnfname)) {
 305         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 306     }
 307
 308     outfmt->output(segto, data, type, size, segment, wrt);
 309 }
 310
 311 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 312                      insn * ins, const uint8_t *code)
 313 {
 314     int64_t isize;
 315     uint8_t c = code[0];
 316
 317     if ((c != 0370 && c != 0371) || (ins->oprs[0].type & STRICT))
 318         return false;
 319     if (!optimizing)
 320         return false;
 321     if (optimizing < 0 && c == 0371)
 322         return false;
 323
 324     isize = calcsize(segment, offset, bits, ins, code);
 325
 326     if (ins->oprs[0].opflags & OPFLAG_UNKNOWN)
 327         /* Be optimistic in pass 1 */
 328         return true;
 329
 330     if (ins->oprs[0].segment != segment)
 331         return false;
 332
 333     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 334     return (isize >= -128 && isize <= 127); /* is it byte size? */
 335 }
 336
 337 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 338                  insn * instruction, struct ofmt *output, efunc error,
 339                  ListGen * listgen)
 340 {
 341     const struct itemplate *temp;
 342     int j;
 343     enum match_result m;
 344     int64_t insn_end;
 345     int32_t itimes;
 346     int64_t start = offset;
 347     int64_t wsize;              /* size for DB etc. */
 348
 349     errfunc = error;            /* to pass to other functions */
 350     cpu = cp;
 351     outfmt = output;            /* likewise */
 352     list = listgen;             /* and again */
 353
 354     wsize = idata_bytes(instruction->opcode);
 355     if (wsize == -1)
 356         return 0;
 357
 358     if (wsize) {
 359         extop *e;
 360         int32_t t = instruction->times;
 361         if (t < 0)
 362             errfunc(ERR_PANIC,
 363                     "instruction->times < 0 (%ld) in assemble()", t);
 364
 365         while (t--) {           /* repeat TIMES times */
 366             list_for_each(e, instruction->eops) {
 367                 if (e->type == EOT_DB_NUMBER) {
 368                     if (wsize > 8) {
 369                         errfunc(ERR_NONFATAL,
 370                                 "integer supplied to a DT, DO or DY"
 371                                 " instruction");
 372                     } else {
 373                         out(offset, segment, &e->offset,
 374                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 375                         offset += wsize;
 376                     }
 377                 } else if (e->type == EOT_DB_STRING ||
 378                            e->type == EOT_DB_STRING_FREE) {
 379                     int align;
 380
 381                     out(offset, segment, e->stringval,
 382                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 383                     align = e->stringlen % wsize;
 384
 385                     if (align) {
 386                         align = wsize - align;
 387                         out(offset, segment, zero_buffer,
 388                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 389                     }
 390                     offset += e->stringlen + align;
 391                 }
 392             }
 393             if (t > 0 && t == instruction->times - 1) {
 394                 /*
 395                  * Dummy call to list->output to give the offset to the
 396                  * listing module.
 397                  */
 398                 list->output(offset, NULL, OUT_RAWDATA, 0);
 399                 list->uplevel(LIST_TIMES);
 400             }
 401         }
 402         if (instruction->times > 1)
 403             list->downlevel(LIST_TIMES);
 404         return offset - start;
 405     }
 406
 407     if (instruction->opcode == I_INCBIN) {
 408         const char *fname = instruction->eops->stringval;
 409         FILE *fp;
 410
 411         fp = fopen(fname, "rb");
 412         if (!fp) {
 413             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 414                   fname);
 415         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 416             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 417                   fname);
 418         } else {
 419             static char buf[4096];
 420             size_t t = instruction->times;
 421             size_t base = 0;
 422             size_t len;
 423
 424             len = ftell(fp);
 425             if (instruction->eops->next) {
 426                 base = instruction->eops->next->offset;
 427                 len -= base;
 428                 if (instruction->eops->next->next &&
 429                     len > (size_t)instruction->eops->next->next->offset)
 430                     len = (size_t)instruction->eops->next->next->offset;
 431             }
 432             /*
 433              * Dummy call to list->output to give the offset to the
 434              * listing module.
 435              */
 436             list->output(offset, NULL, OUT_RAWDATA, 0);
 437             list->uplevel(LIST_INCBIN);
 438             while (t--) {
 439                 size_t l;
 440
 441                 fseek(fp, base, SEEK_SET);
 442                 l = len;
 443                 while (l > 0) {
 444                     int32_t m;
 445                     m = fread(buf, 1, l > sizeof(buf) ? sizeof(buf) : l, fp);
 446                     if (!m) {
 447                         /*
 448                          * This shouldn't happen unless the file
 449                          * actually changes while we are reading
 450                          * it.
 451                          */
 452                         error(ERR_NONFATAL,
 453                               "`incbin': unexpected EOF while"
 454                               " reading file `%s'", fname);
 455                         t = 0;  /* Try to exit cleanly */
 456                         break;
 457                     }
 458                     out(offset, segment, buf, OUT_RAWDATA, m,
 459                         NO_SEG, NO_SEG);
 460                     l -= m;
 461                 }
 462             }
 463             list->downlevel(LIST_INCBIN);
 464             if (instruction->times > 1) {
 465                 /*
 466                  * Dummy call to list->output to give the offset to the
 467                  * listing module.
 468                  */
 469                 list->output(offset, NULL, OUT_RAWDATA, 0);
 470                 list->uplevel(LIST_TIMES);
 471                 list->downlevel(LIST_TIMES);
 472             }
 473             fclose(fp);
 474             return instruction->times * len;
 475         }
 476         return 0;               /* if we're here, there's an error */
 477     }
 478
 479     /* Check to see if we need an address-size prefix */
 480     add_asp(instruction, bits);
 481
 482     m = find_match(&temp, instruction, segment, offset, bits);
 483
 484     if (m == MOK_GOOD) {
 485         /* Matches! */
 486         int64_t insn_size = calcsize(segment, offset, bits,
 487                                      instruction, temp->code);
 488         itimes = instruction->times;
 489         if (insn_size < 0)  /* shouldn't be, on pass two */
 490             error(ERR_PANIC, "errors made it through from pass one");
 491         else
 492             while (itimes--) {
 493                 for (j = 0; j < MAXPREFIX; j++) {
 494                     uint8_t c = 0;
 495                     switch (instruction->prefixes[j]) {
 496                     case P_WAIT:
 497                         c = 0x9B;
 498                         break;
 499                     case P_LOCK:
 500                         c = 0xF0;
 501                         break;
 502                     case P_REPNE:
 503                     case P_REPNZ:
 504                         c = 0xF2;
 505                         break;
 506                     case P_REPE:
 507                     case P_REPZ:
 508                     case P_REP:
 509                         c = 0xF3;
 510                         break;
 511                     case R_CS:
 512                         if (bits == 64) {
 513                             error(ERR_WARNING | ERR_PASS2,
 514                                   "cs segment base generated, but will be ignored in 64-bit mode");
 515                         }
 516                         c = 0x2E;
 517                         break;
 518                     case R_DS:
 519                         if (bits == 64) {
 520                             error(ERR_WARNING | ERR_PASS2,
 521                                   "ds segment base generated, but will be ignored in 64-bit mode");
 522                         }
 523                         c = 0x3E;
 524                         break;
 525                     case R_ES:
 526                         if (bits == 64) {
 527                             error(ERR_WARNING | ERR_PASS2,
 528                                   "es segment base generated, but will be ignored in 64-bit mode");
 529                         }
 530                         c = 0x26;
 531                         break;
 532                     case R_FS:
 533                         c = 0x64;
 534                         break;
 535                     case R_GS:
 536                         c = 0x65;
 537                         break;
 538                     case R_SS:
 539                         if (bits == 64) {
 540                             error(ERR_WARNING | ERR_PASS2,
 541                                   "ss segment base generated, but will be ignored in 64-bit mode");
 542                         }
 543                         c = 0x36;
 544                         break;
 545                     case R_SEGR6:
 546                     case R_SEGR7:
 547                         error(ERR_NONFATAL,
 548                               "segr6 and segr7 cannot be used as prefixes");
 549                         break;
 550                     case P_A16:
 551                         if (bits == 64) {
 552                             error(ERR_NONFATAL,
 553                                   "16-bit addressing is not supported "
 554                                   "in 64-bit mode");
 555                         } else if (bits != 16)
 556                             c = 0x67;
 557                         break;
 558                     case P_A32:
 559                         if (bits != 32)
 560                             c = 0x67;
 561                         break;
 562                     case P_A64:
 563                         if (bits != 64) {
 564                             error(ERR_NONFATAL,
 565                                   "64-bit addressing is only supported "
 566                                   "in 64-bit mode");
 567                         }
 568                         break;
 569                     case P_ASP:
 570                         c = 0x67;
 571                         break;
 572                     case P_O16:
 573                         if (bits != 16)
 574                             c = 0x66;
 575                         break;
 576                     case P_O32:
 577                         if (bits == 16)
 578                             c = 0x66;
 579                         break;
 580                     case P_O64:
 581                         /* REX.W */
 582                         break;
 583                     case P_OSP:
 584                         c = 0x66;
 585                         break;
 586                     case P_none:
 587                         break;
 588                     default:
 589                         error(ERR_PANIC, "invalid instruction prefix");
 590                     }
 591                     if (c != 0) {
 592                         out(offset, segment, &c, OUT_RAWDATA, 1,
 593                             NO_SEG, NO_SEG);
 594                         offset++;
 595                     }
 596                 }
 597                 insn_end = offset + insn_size;
 598                 gencode(segment, offset, bits, instruction,
 599                         temp, insn_end);
 600                 offset += insn_size;
 601                 if (itimes > 0 && itimes == instruction->times - 1) {
 602                     /*
 603                      * Dummy call to list->output to give the offset to the
 604                      * listing module.
 605                      */
 606                     list->output(offset, NULL, OUT_RAWDATA, 0);
 607                     list->uplevel(LIST_TIMES);
 608                 }
 609             }
 610         if (instruction->times > 1)
 611             list->downlevel(LIST_TIMES);
 612         return offset - start;
 613     } else {
 614         /* No match */
 615         switch (m) {
 616         case MERR_OPSIZEMISSING:
 617             error(ERR_NONFATAL, "operation size not specified");
 618             break;
 619         case MERR_OPSIZEMISMATCH:
 620             error(ERR_NONFATAL, "mismatch in operand sizes");
 621             break;
 622         case MERR_BADCPU:
 623             error(ERR_NONFATAL, "no instruction for this cpu level");
 624             break;
 625         case MERR_BADMODE:
 626             error(ERR_NONFATAL, "instruction not supported in %d-bit mode",
 627                   bits);
 628             break;
 629         default:
 630             error(ERR_NONFATAL,
 631                   "invalid combination of opcode and operands");
 632             break;
 633         }
 634     }
 635     return 0;
 636 }
 637
 638 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 639                   insn * instruction, efunc error)
 640 {
 641     const struct itemplate *temp;
 642     enum match_result m;
 643
 644     errfunc = error;            /* to pass to other functions */
 645     cpu = cp;
 646
 647     if (instruction->opcode == I_none)
 648         return 0;
 649
 650     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 651         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 652         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 653         instruction->opcode == I_DY) {
 654         extop *e;
 655         int32_t isize, osize, wsize;
 656
 657         isize = 0;
 658         wsize = idata_bytes(instruction->opcode);
 659
 660         list_for_each(e, instruction->eops) {
 661             int32_t align;
 662
 663             osize = 0;
 664             if (e->type == EOT_DB_NUMBER) {
 665                 osize = 1;
 666                 warn_overflow_const(e->offset, wsize);
 667             } else if (e->type == EOT_DB_STRING ||
 668                        e->type == EOT_DB_STRING_FREE)
 669                 osize = e->stringlen;
 670
 671             align = (-osize) % wsize;
 672             if (align < 0)
 673                 align += wsize;
 674             isize += osize + align;
 675         }
 676         return isize * instruction->times;
 677     }
 678
 679     if (instruction->opcode == I_INCBIN) {
 680         const char *fname = instruction->eops->stringval;
 681         FILE *fp;
 682         int64_t val = 0;
 683         size_t len;
 684
 685         fp = fopen(fname, "rb");
 686         if (!fp)
 687             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 688                   fname);
 689         else if (fseek(fp, 0L, SEEK_END) < 0)
 690             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 691                   fname);
 692         else {
 693             len = ftell(fp);
 694             if (instruction->eops->next) {
 695                 len -= instruction->eops->next->offset;
 696                 if (instruction->eops->next->next &&
 697                     len > (size_t)instruction->eops->next->next->offset) {
 698                     len = (size_t)instruction->eops->next->next->offset;
 699                 }
 700             }
 701             val = instruction->times * len;
 702         }
 703         if (fp)
 704             fclose(fp);
 705         return val;
 706     }
 707
 708     /* Check to see if we need an address-size prefix */
 709     add_asp(instruction, bits);
 710
 711     m = find_match(&temp, instruction, segment, offset, bits);
 712     if (m == MOK_GOOD) {
 713         /* we've matched an instruction. */
 714         int64_t isize;
 715         const uint8_t *codes = temp->code;
 716         int j;
 717
 718         isize = calcsize(segment, offset, bits, instruction, codes);
 719         if (isize < 0)
 720             return -1;
 721         for (j = 0; j < MAXPREFIX; j++) {
 722             switch (instruction->prefixes[j]) {
 723             case P_A16:
 724                 if (bits != 16)
 725                     isize++;
 726                 break;
 727             case P_A32:
 728                 if (bits != 32)
 729                     isize++;
 730                 break;
 731             case P_O16:
 732                 if (bits != 16)
 733                     isize++;
 734                 break;
 735             case P_O32:
 736                 if (bits == 16)
 737                     isize++;
 738                 break;
 739             case P_A64:
 740             case P_O64:
 741             case P_none:
 742                 break;
 743             default:
 744                 isize++;
 745                 break;
 746             }
 747         }
 748         return isize * instruction->times;
 749     } else {
 750         return -1;                  /* didn't match any instruction */
 751     }
 752 }
 753
 754 static bool possible_sbyte(operand *o)
 755 {
 756     return o->wrt == NO_SEG && o->segment == NO_SEG &&
 757         !(o->opflags & OPFLAG_UNKNOWN) &&
 758         optimizing >= 0 && !(o->type & STRICT);
 759 }
 760
 761 /* check that opn[op]  is a signed byte of size 16 or 32 */
 762 static bool is_sbyte16(operand *o)
 763 {
 764     int16_t v;
 765
 766     if (!possible_sbyte(o))
 767         return false;
 768
 769     v = o->offset;
 770     return v >= -128 && v <= 127;
 771 }
 772
 773 static bool is_sbyte32(operand *o)
 774 {
 775     int32_t v;
 776
 777     if (!possible_sbyte(o))
 778         return false;
 779
 780     v = o->offset;
 781     return v >= -128 && v <= 127;
 782 }
 783
 784 /* Common construct */
 785 #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
 786
 787 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 788                         insn * ins, const uint8_t *codes)
 789 {
 790     int64_t length = 0;
 791     uint8_t c;
 792     int rex_mask = ~0;
 793     int op1, op2;
 794     struct operand *opx;
 795     uint8_t opex = 0;
 796
 797     ins->rex = 0;               /* Ensure REX is reset */
 798
 799     if (ins->prefixes[PPS_OSIZE] == P_O64)
 800         ins->rex |= REX_W;
 801
 802     (void)segment;              /* Don't warn that this parameter is unused */
 803     (void)offset;               /* Don't warn that this parameter is unused */
 804
 805     while (*codes) {
 806         c = *codes++;
 807         op1 = (c & 3) + ((opex & 1) << 2);
 808         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
 809         opx = &ins->oprs[op1];
 810         opex = 0;               /* For the next iteration */
 811
 812         switch (c) {
 813         case 01:
 814         case 02:
 815         case 03:
 816         case 04:
 817             codes += c, length += c;
 818             break;
 819
 820         case 05:
 821         case 06:
 822         case 07:
 823             opex = c;
 824             break;
 825
 826         case4(010):
 827             ins->rex |=
 828                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 829             codes++, length++;
 830             break;
 831
 832         case4(014):
 833         case4(020):
 834         case4(024):
 835             length++;
 836             break;
 837
 838         case4(030):
 839             length += 2;
 840             break;
 841
 842         case4(034):
 843             if (opx->type & (BITS16 | BITS32 | BITS64))
 844                 length += (opx->type & BITS16) ? 2 : 4;
 845             else
 846                 length += (bits == 16) ? 2 : 4;
 847             break;
 848
 849         case4(040):
 850             length += 4;
 851             break;
 852
 853         case4(044):
 854             length += ins->addr_size >> 3;
 855             break;
 856
 857         case4(050):
 858             length++;
 859             break;
 860
 861         case4(054):
 862             length += 8; /* MOV reg64/imm */
 863             break;
 864
 865         case4(060):
 866             length += 2;
 867             break;
 868
 869         case4(064):
 870             if (opx->type & (BITS16 | BITS32 | BITS64))
 871                 length += (opx->type & BITS16) ? 2 : 4;
 872             else
 873                 length += (bits == 16) ? 2 : 4;
 874             break;
 875
 876         case4(070):
 877             length += 4;
 878             break;
 879
 880         case4(074):
 881             length += 2;
 882             break;
 883
 884         case4(0140):
 885             length += is_sbyte16(opx) ? 1 : 2;
 886             break;
 887
 888         case4(0144):
 889             codes++;
 890             length++;
 891             break;
 892
 893         case4(0150):
 894             length += is_sbyte32(opx) ? 1 : 4;
 895             break;
 896
 897         case4(0154):
 898             codes++;
 899             length++;
 900             break;
 901
 902         case4(0160):
 903             length++;
 904             ins->rex |= REX_D;
 905             ins->drexdst = regval(opx);
 906             break;
 907
 908         case4(0164):
 909             length++;
 910             ins->rex |= REX_D|REX_OC;
 911             ins->drexdst = regval(opx);
 912             break;
 913
 914         case 0171:
 915             break;
 916
 917         case 0172:
 918         case 0173:
 919         case 0174:
 920             codes++;
 921             length++;
 922             break;
 923
 924         case4(0250):
 925             length += is_sbyte32(opx) ? 1 : 4;
 926             break;
 927
 928         case4(0254):
 929             length += 4;
 930             break;
 931
 932         case4(0260):
 933             ins->rex |= REX_V;
 934             ins->drexdst = regval(opx);
 935             ins->vex_cm = *codes++;
 936             ins->vex_wlp = *codes++;
 937             break;
 938
 939         case 0270:
 940             ins->rex |= REX_V;
 941             ins->drexdst = 0;
 942             ins->vex_cm = *codes++;
 943             ins->vex_wlp = *codes++;
 944             break;
 945
 946         case4(0274):
 947             length++;
 948             break;
 949
 950         case4(0300):
 951             break;
 952
 953         case 0310:
 954             if (bits == 64)
 955                 return -1;
 956             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
 957             break;
 958
 959         case 0311:
 960             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
 961             break;
 962
 963         case 0312:
 964             break;
 965
 966         case 0313:
 967             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
 968                 has_prefix(ins, PPS_ASIZE, P_A32))
 969                 return -1;
 970             break;
 971
 972         case4(0314):
 973             break;
 974
 975         case 0320:
 976             length += (bits != 16);
 977             break;
 978
 979         case 0321:
 980             length += (bits == 16);
 981             break;
 982
 983         case 0322:
 984             break;
 985
 986         case 0323:
 987             rex_mask &= ~REX_W;
 988             break;
 989
 990         case 0324:
 991             ins->rex |= REX_W;
 992             break;
 993
 994         case 0325:
 995             ins->rex |= REX_NH;
 996             break;
 997
 998         case 0330:
 999             codes++, length++;
1000             break;
1001
1002         case 0331:
1003             break;
1004
1005         case 0332:
1006         case 0333:
1007             length++;
1008             break;
1009
1010         case 0334:
1011             ins->rex |= REX_L;
1012             break;
1013
1014         case 0335:
1015             break;
1016
1017         case 0336:
1018             if (!ins->prefixes[PPS_LREP])
1019                 ins->prefixes[PPS_LREP] = P_REP;
1020             break;
1021
1022         case 0337:
1023             if (!ins->prefixes[PPS_LREP])
1024                 ins->prefixes[PPS_LREP] = P_REPNE;
1025             break;
1026
1027         case 0340:
1028             if (ins->oprs[0].segment != NO_SEG)
1029                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1030                         " quantity of BSS space");
1031             else
1032                 length += ins->oprs[0].offset;
1033             break;
1034
1035         case 0341:
1036             if (!ins->prefixes[PPS_WAIT])
1037                 ins->prefixes[PPS_WAIT] = P_WAIT;
1038             break;
1039
1040         case4(0344):
1041             length++;
1042             break;
1043
1044         case 0360:
1045             break;
1046
1047         case 0361:
1048         case 0362:
1049         case 0363:
1050             length++;
1051             break;
1052
1053         case 0364:
1054         case 0365:
1055             break;
1056
1057         case 0366:
1058         case 0367:
1059             length++;
1060             break;
1061
1062         case 0370:
1063         case 0371:
1064         case 0372:
1065             break;
1066
1067         case 0373:
1068             length++;
1069             break;
1070
1071         case4(0100):
1072         case4(0110):
1073         case4(0120):
1074         case4(0130):
1075         case4(0200):
1076         case4(0204):
1077         case4(0210):
1078         case4(0214):
1079         case4(0220):
1080         case4(0224):
1081         case4(0230):
1082         case4(0234):
1083             {
1084                 ea ea_data;
1085                 int rfield;
1086                 opflags_t rflags;
1087                 struct operand *opy = &ins->oprs[op2];
1088
1089                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1090
1091                 if (c <= 0177) {
1092                     /* pick rfield from operand b (opx) */
1093                     rflags = regflag(opx);
1094                     rfield = nasm_regvals[opx->basereg];
1095                 } else {
1096                     rflags = 0;
1097                     rfield = c & 7;
1098                 }
1099                 if (!process_ea(opy, &ea_data, bits,
1100                                 ins->addr_size, rfield, rflags)) {
1101                     errfunc(ERR_NONFATAL, "invalid effective address");
1102                     return -1;
1103                 } else {
1104                     ins->rex |= ea_data.rex;
1105                     length += ea_data.size;
1106                 }
1107             }
1108             break;
1109
1110         default:
1111             errfunc(ERR_PANIC, "internal instruction table corrupt"
1112                     ": instruction code \\%o (0x%02X) given", c, c);
1113             break;
1114         }
1115     }
1116
1117     ins->rex &= rex_mask;
1118
1119     if (ins->rex & REX_NH) {
1120         if (ins->rex & REX_H) {
1121             errfunc(ERR_NONFATAL, "instruction cannot use high registers");
1122             return -1;
1123         }
1124         ins->rex &= ~REX_P;     /* Don't force REX prefix due to high reg */
1125     }
1126
1127     if (ins->rex & REX_V) {
1128         int bad32 = REX_R|REX_W|REX_X|REX_B;
1129
1130         if (ins->rex & REX_H) {
1131             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1132             return -1;
1133         }
1134         switch (ins->vex_wlp & 030) {
1135         case 000:
1136         case 020:
1137             ins->rex &= ~REX_W;
1138             break;
1139         case 010:
1140             ins->rex |= REX_W;
1141             bad32 &= ~REX_W;
1142             break;
1143         case 030:
1144             /* Follow REX_W */
1145             break;
1146         }
1147
1148         if (bits != 64 && ((ins->rex & bad32) || ins->drexdst > 7)) {
1149             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1150             return -1;
1151         }
1152         if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_R|REX_B)))
1153             length += 3;
1154         else
1155             length += 2;
1156     } else if (ins->rex & REX_D) {
1157         if (ins->rex & REX_H) {
1158             errfunc(ERR_NONFATAL, "cannot use high register in drex instruction");
1159             return -1;
1160         }
1161         if (bits != 64 && ((ins->rex & (REX_R|REX_W|REX_X|REX_B)) ||
1162                            ins->drexdst > 7)) {
1163             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1164             return -1;
1165         }
1166         length++;
1167     } else if (ins->rex & REX_REAL) {
1168         if (ins->rex & REX_H) {
1169             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1170             return -1;
1171         } else if (bits == 64) {
1172             length++;
1173         } else if ((ins->rex & REX_L) &&
1174                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1175                    cpu >= IF_X86_64) {
1176             /* LOCK-as-REX.R */
1177             assert_no_prefix(ins, PPS_LREP);
1178             length++;
1179         } else {
1180             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1181             return -1;
1182         }
1183     }
1184
1185     return length;
1186 }
1187
1188 #define EMIT_REX()                                                      \
1189     if (!(ins->rex & (REX_D|REX_V)) && (ins->rex & REX_REAL) && (bits == 64)) { \
1190         ins->rex = (ins->rex & REX_REAL)|REX_P;                         \
1191         out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG); \
1192         ins->rex = 0;                                                   \
1193         offset += 1; \
1194     }
1195
1196 static void gencode(int32_t segment, int64_t offset, int bits,
1197                     insn * ins, const struct itemplate *temp,
1198                     int64_t insn_end)
1199 {
1200     static char condval[] = {   /* conditional opcodes */
1201         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1202         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1203         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1204     };
1205     uint8_t c;
1206     uint8_t bytes[4];
1207     int64_t size;
1208     int64_t data;
1209     int op1, op2;
1210     struct operand *opx;
1211     const uint8_t *codes = temp->code;
1212     uint8_t opex = 0;
1213
1214     while (*codes) {
1215         c = *codes++;
1216         op1 = (c & 3) + ((opex & 1) << 2);
1217         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
1218         opx = &ins->oprs[op1];
1219         opex = 0;               /* For the next iteration */
1220
1221         switch (c) {
1222         case 01:
1223         case 02:
1224         case 03:
1225         case 04:
1226             EMIT_REX();
1227             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1228             codes += c;
1229             offset += c;
1230             break;
1231
1232         case 05:
1233         case 06:
1234         case 07:
1235             opex = c;
1236             break;
1237
1238         case4(010):
1239             EMIT_REX();
1240             bytes[0] = *codes++ + (regval(opx) & 7);
1241             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1242             offset += 1;
1243             break;
1244
1245         case4(014):
1246             /* The test for BITS8 and SBYTE here is intended to avoid
1247                warning on optimizer actions due to SBYTE, while still
1248                warn on explicit BYTE directives.  Also warn, obviously,
1249                if the optimizer isn't enabled. */
1250             if (((opx->type & BITS8) ||
1251                  !(opx->type & temp->opd[op1] & BYTENESS)) &&
1252                 (opx->offset < -128 || opx->offset > 127)) {
1253                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1254                         "signed byte value exceeds bounds");
1255             }
1256             if (opx->segment != NO_SEG) {
1257                 data = opx->offset;
1258                 out(offset, segment, &data, OUT_ADDRESS, 1,
1259                     opx->segment, opx->wrt);
1260             } else {
1261                 bytes[0] = opx->offset;
1262                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1263                     NO_SEG);
1264             }
1265             offset += 1;
1266             break;
1267
1268         case4(020):
1269             if (opx->offset < -256 || opx->offset > 255) {
1270                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1271                         "byte value exceeds bounds");
1272             }
1273             if (opx->segment != NO_SEG) {
1274                 data = opx->offset;
1275                 out(offset, segment, &data, OUT_ADDRESS, 1,
1276                     opx->segment, opx->wrt);
1277             } else {
1278                 bytes[0] = opx->offset;
1279                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1280                     NO_SEG);
1281             }
1282             offset += 1;
1283             break;
1284
1285         case4(024):
1286             if (opx->offset < 0 || opx->offset > 255)
1287                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1288                         "unsigned byte value exceeds bounds");
1289             if (opx->segment != NO_SEG) {
1290                 data = opx->offset;
1291                 out(offset, segment, &data, OUT_ADDRESS, 1,
1292                     opx->segment, opx->wrt);
1293             } else {
1294                 bytes[0] = opx->offset;
1295                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1296                     NO_SEG);
1297             }
1298             offset += 1;
1299             break;
1300
1301         case4(030):
1302             warn_overflow_opd(opx, 2);
1303             data = opx->offset;
1304             out(offset, segment, &data, OUT_ADDRESS, 2,
1305                 opx->segment, opx->wrt);
1306             offset += 2;
1307             break;
1308
1309         case4(034):
1310             if (opx->type & (BITS16 | BITS32))
1311                 size = (opx->type & BITS16) ? 2 : 4;
1312             else
1313                 size = (bits == 16) ? 2 : 4;
1314             warn_overflow_opd(opx, size);
1315             data = opx->offset;
1316             out(offset, segment, &data, OUT_ADDRESS, size,
1317                 opx->segment, opx->wrt);
1318             offset += size;
1319             break;
1320
1321         case4(040):
1322             warn_overflow_opd(opx, 4);
1323             data = opx->offset;
1324             out(offset, segment, &data, OUT_ADDRESS, 4,
1325                 opx->segment, opx->wrt);
1326             offset += 4;
1327             break;
1328
1329         case4(044):
1330             data = opx->offset;
1331             size = ins->addr_size >> 3;
1332             warn_overflow_opd(opx, size);
1333             out(offset, segment, &data, OUT_ADDRESS, size,
1334                 opx->segment, opx->wrt);
1335             offset += size;
1336             break;
1337
1338         case4(050):
1339             if (opx->segment != segment) {
1340                 data = opx->offset;
1341                 out(offset, segment, &data,
1342                     OUT_REL1ADR, insn_end - offset,
1343                     opx->segment, opx->wrt);
1344             } else {
1345                 data = opx->offset - insn_end;
1346                 if (data > 127 || data < -128)
1347                     errfunc(ERR_NONFATAL, "short jump is out of range");
1348                 out(offset, segment, &data,
1349                     OUT_ADDRESS, 1, NO_SEG, NO_SEG);
1350             }
1351             offset += 1;
1352             break;
1353
1354         case4(054):
1355             data = (int64_t)opx->offset;
1356             out(offset, segment, &data, OUT_ADDRESS, 8,
1357                 opx->segment, opx->wrt);
1358             offset += 8;
1359             break;
1360
1361         case4(060):
1362             if (opx->segment != segment) {
1363                 data = opx->offset;
1364                 out(offset, segment, &data,
1365                     OUT_REL2ADR, insn_end - offset,
1366                     opx->segment, opx->wrt);
1367             } else {
1368                 data = opx->offset - insn_end;
1369                 out(offset, segment, &data,
1370                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1371             }
1372             offset += 2;
1373             break;
1374
1375         case4(064):
1376             if (opx->type & (BITS16 | BITS32 | BITS64))
1377                 size = (opx->type & BITS16) ? 2 : 4;
1378             else
1379                 size = (bits == 16) ? 2 : 4;
1380             if (opx->segment != segment) {
1381                 data = opx->offset;
1382                 out(offset, segment, &data,
1383                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1384                     insn_end - offset, opx->segment, opx->wrt);
1385             } else {
1386                 data = opx->offset - insn_end;
1387                 out(offset, segment, &data,
1388                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1389             }
1390             offset += size;
1391             break;
1392
1393         case4(070):
1394             if (opx->segment != segment) {
1395                 data = opx->offset;
1396                 out(offset, segment, &data,
1397                     OUT_REL4ADR, insn_end - offset,
1398                     opx->segment, opx->wrt);
1399             } else {
1400                 data = opx->offset - insn_end;
1401                 out(offset, segment, &data,
1402                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1403             }
1404             offset += 4;
1405             break;
1406
1407         case4(074):
1408             if (opx->segment == NO_SEG)
1409                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1410                         " relocatable");
1411             data = 0;
1412             out(offset, segment, &data, OUT_ADDRESS, 2,
1413                 outfmt->segbase(1 + opx->segment),
1414                 opx->wrt);
1415             offset += 2;
1416             break;
1417
1418         case4(0140):
1419             data = opx->offset;
1420             warn_overflow_opd(opx, 2);
1421             if (is_sbyte16(opx)) {
1422                 bytes[0] = data;
1423                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1424                     NO_SEG);
1425                 offset++;
1426             } else {
1427                 out(offset, segment, &data, OUT_ADDRESS, 2,
1428                     opx->segment, opx->wrt);
1429                 offset += 2;
1430             }
1431             break;
1432
1433         case4(0144):
1434             EMIT_REX();
1435             bytes[0] = *codes++;
1436             if (is_sbyte16(opx))
1437                 bytes[0] |= 2;  /* s-bit */
1438             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1439             offset++;
1440             break;
1441
1442         case4(0150):
1443             data = opx->offset;
1444             warn_overflow_opd(opx, 4);
1445             if (is_sbyte32(opx)) {
1446                 bytes[0] = data;
1447                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1448                     NO_SEG);
1449                 offset++;
1450             } else {
1451                 out(offset, segment, &data, OUT_ADDRESS, 4,
1452                     opx->segment, opx->wrt);
1453                 offset += 4;
1454             }
1455             break;
1456
1457         case4(0154):
1458             EMIT_REX();
1459             bytes[0] = *codes++;
1460             if (is_sbyte32(opx))
1461                 bytes[0] |= 2;  /* s-bit */
1462             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1463             offset++;
1464             break;
1465
1466         case4(0160):
1467         case4(0164):
1468             break;
1469
1470         case 0171:
1471             bytes[0] =
1472                 (ins->drexdst << 4) |
1473                 (ins->rex & REX_OC ? 0x08 : 0) |
1474                 (ins->rex & (REX_R|REX_X|REX_B));
1475             ins->rex = 0;
1476             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1477             offset++;
1478             break;
1479
1480         case 0172:
1481             c = *codes++;
1482             opx = &ins->oprs[c >> 3];
1483             bytes[0] = nasm_regvals[opx->basereg] << 4;
1484             opx = &ins->oprs[c & 7];
1485             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1486                 errfunc(ERR_NONFATAL,
1487                         "non-absolute expression not permitted as argument %d",
1488                         c & 7);
1489             } else {
1490                 if (opx->offset & ~15) {
1491                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1492                             "four-bit argument exceeds bounds");
1493                 }
1494                 bytes[0] |= opx->offset & 15;
1495             }
1496             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1497             offset++;
1498             break;
1499
1500         case 0173:
1501             c = *codes++;
1502             opx = &ins->oprs[c >> 4];
1503             bytes[0] = nasm_regvals[opx->basereg] << 4;
1504             bytes[0] |= c & 15;
1505             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1506             offset++;
1507             break;
1508
1509         case 0174:
1510             c = *codes++;
1511             opx = &ins->oprs[c];
1512             bytes[0] = nasm_regvals[opx->basereg] << 4;
1513             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1514             offset++;
1515             break;
1516
1517         case4(0250):
1518             data = opx->offset;
1519             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1520                 (int32_t)data != (int64_t)data) {
1521                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1522                         "signed dword immediate exceeds bounds");
1523             }
1524             if (is_sbyte32(opx)) {
1525                 bytes[0] = data;
1526                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1527                     NO_SEG);
1528                 offset++;
1529             } else {
1530                 out(offset, segment, &data, OUT_ADDRESS, 4,
1531                     opx->segment, opx->wrt);
1532                 offset += 4;
1533             }
1534             break;
1535
1536         case4(0254):
1537             data = opx->offset;
1538             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1539                 (int32_t)data != (int64_t)data) {
1540                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1541                         "signed dword immediate exceeds bounds");
1542             }
1543             out(offset, segment, &data, OUT_ADDRESS, 4,
1544                 opx->segment, opx->wrt);
1545             offset += 4;
1546             break;
1547
1548         case4(0260):
1549         case 0270:
1550             codes += 2;
1551             if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1552                 bytes[0] = (ins->vex_cm >> 6) ? 0x8f : 0xc4;
1553                 bytes[1] = (ins->vex_cm & 31) | ((~ins->rex & 7) << 5);
1554                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1555                     ((~ins->drexdst & 15)<< 3) | (ins->vex_wlp & 07);
1556                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1557                 offset += 3;
1558             } else {
1559                 bytes[0] = 0xc5;
1560                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1561                     ((~ins->drexdst & 15) << 3) | (ins->vex_wlp & 07);
1562                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1563                 offset += 2;
1564             }
1565             break;
1566
1567         case4(0274):
1568         {
1569             uint64_t uv, um;
1570             int s;
1571
1572             if (ins->rex & REX_W)
1573                 s = 64;
1574             else if (ins->prefixes[PPS_OSIZE] == P_O16)
1575                 s = 16;
1576             else if (ins->prefixes[PPS_OSIZE] == P_O32)
1577                 s = 32;
1578             else
1579                 s = bits;
1580
1581             um = (uint64_t)2 << (s-1);
1582             uv = opx->offset;
1583
1584             if (uv > 127 && uv < (uint64_t)-128 &&
1585                 (uv < um-128 || uv > um-1)) {
1586                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1587                         "signed byte value exceeds bounds");
1588             }
1589             if (opx->segment != NO_SEG) {
1590                 data = uv;
1591                 out(offset, segment, &data, OUT_ADDRESS, 1,
1592                     opx->segment, opx->wrt);
1593             } else {
1594                 bytes[0] = uv;
1595                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1596                     NO_SEG);
1597             }
1598             offset += 1;
1599             break;
1600         }
1601
1602         case4(0300):
1603             break;
1604
1605         case 0310:
1606             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1607                 *bytes = 0x67;
1608                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1609                 offset += 1;
1610             } else
1611                 offset += 0;
1612             break;
1613
1614         case 0311:
1615             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1616                 *bytes = 0x67;
1617                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1618                 offset += 1;
1619             } else
1620                 offset += 0;
1621             break;
1622
1623         case 0312:
1624             break;
1625
1626         case 0313:
1627             ins->rex = 0;
1628             break;
1629
1630         case4(0314):
1631             break;
1632
1633         case 0320:
1634             if (bits != 16) {
1635                 *bytes = 0x66;
1636                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1637                 offset += 1;
1638             } else
1639                 offset += 0;
1640             break;
1641
1642         case 0321:
1643             if (bits == 16) {
1644                 *bytes = 0x66;
1645                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1646                 offset += 1;
1647             } else
1648                 offset += 0;
1649             break;
1650
1651         case 0322:
1652         case 0323:
1653             break;
1654
1655         case 0324:
1656             ins->rex |= REX_W;
1657             break;
1658
1659         case 0325:
1660             break;
1661
1662         case 0330:
1663             *bytes = *codes++ ^ condval[ins->condition];
1664             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1665             offset += 1;
1666             break;
1667
1668         case 0331:
1669             break;
1670
1671         case 0332:
1672         case 0333:
1673             *bytes = c - 0332 + 0xF2;
1674             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1675             offset += 1;
1676             break;
1677
1678         case 0334:
1679             if (ins->rex & REX_R) {
1680                 *bytes = 0xF0;
1681                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1682                 offset += 1;
1683             }
1684             ins->rex &= ~(REX_L|REX_R);
1685             break;
1686
1687         case 0335:
1688             break;
1689
1690         case 0336:
1691         case 0337:
1692             break;
1693
1694         case 0340:
1695             if (ins->oprs[0].segment != NO_SEG)
1696                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1697             else {
1698                 int64_t size = ins->oprs[0].offset;
1699                 if (size > 0)
1700                     out(offset, segment, NULL,
1701                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1702                 offset += size;
1703             }
1704             break;
1705
1706         case 0341:
1707             break;
1708
1709         case 0344:
1710         case 0345:
1711             bytes[0] = c & 1;
1712             switch (ins->oprs[0].basereg) {
1713             case R_CS:
1714                 bytes[0] += 0x0E;
1715                 break;
1716             case R_DS:
1717                 bytes[0] += 0x1E;
1718                 break;
1719             case R_ES:
1720                 bytes[0] += 0x06;
1721                 break;
1722             case R_SS:
1723                 bytes[0] += 0x16;
1724                 break;
1725             default:
1726                 errfunc(ERR_PANIC,
1727                         "bizarre 8086 segment register received");
1728             }
1729             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1730             offset++;
1731             break;
1732
1733         case 0346:
1734         case 0347:
1735             bytes[0] = c & 1;
1736             switch (ins->oprs[0].basereg) {
1737             case R_FS:
1738                 bytes[0] += 0xA0;
1739                 break;
1740             case R_GS:
1741                 bytes[0] += 0xA8;
1742                 break;
1743             default:
1744                 errfunc(ERR_PANIC,
1745                         "bizarre 386 segment register received");
1746             }
1747             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1748             offset++;
1749             break;
1750
1751         case 0360:
1752             break;
1753
1754         case 0361:
1755             bytes[0] = 0x66;
1756             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1757             offset += 1;
1758             break;
1759
1760         case 0362:
1761         case 0363:
1762             bytes[0] = c - 0362 + 0xf2;
1763             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1764             offset += 1;
1765             break;
1766
1767         case 0364:
1768         case 0365:
1769             break;
1770
1771         case 0366:
1772         case 0367:
1773             *bytes = c - 0366 + 0x66;
1774             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1775             offset += 1;
1776             break;
1777
1778         case 0370:
1779         case 0371:
1780         case 0372:
1781             break;
1782
1783         case 0373:
1784             *bytes = bits == 16 ? 3 : 5;
1785             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1786             offset += 1;
1787             break;
1788
1789         case4(0100):
1790         case4(0110):
1791         case4(0120):
1792         case4(0130):
1793         case4(0200):
1794         case4(0204):
1795         case4(0210):
1796         case4(0214):
1797         case4(0220):
1798         case4(0224):
1799         case4(0230):
1800         case4(0234):
1801             {
1802                 ea ea_data;
1803                 int rfield;
1804                 opflags_t rflags;
1805                 uint8_t *p;
1806                 int32_t s;
1807                 enum out_type type;
1808                 struct operand *opy = &ins->oprs[op2];
1809
1810                 if (c <= 0177) {
1811                     /* pick rfield from operand b (opx) */
1812                     rflags = regflag(opx);
1813                     rfield = nasm_regvals[opx->basereg];
1814                 } else {
1815                     /* rfield is constant */
1816                     rflags = 0;
1817                     rfield = c & 7;
1818                 }
1819
1820                 if (!process_ea(opy, &ea_data, bits, ins->addr_size,
1821                                 rfield, rflags)) {
1822                     errfunc(ERR_NONFATAL, "invalid effective address");
1823                 }
1824
1825
1826                 p = bytes;
1827                 *p++ = ea_data.modrm;
1828                 if (ea_data.sib_present)
1829                     *p++ = ea_data.sib;
1830
1831                 /* DREX suffixes come between the SIB and the displacement */
1832                 if (ins->rex & REX_D) {
1833                     *p++ = (ins->drexdst << 4) |
1834                            (ins->rex & REX_OC ? 0x08 : 0) |
1835                            (ins->rex & (REX_R|REX_X|REX_B));
1836                     ins->rex = 0;
1837                 }
1838
1839                 s = p - bytes;
1840                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1841
1842                 /*
1843                  * Make sure the address gets the right offset in case
1844                  * the line breaks in the .lst file (BR 1197827)
1845                  */
1846                 offset += s;
1847                 s = 0;
1848
1849                 switch (ea_data.bytes) {
1850                 case 0:
1851                     break;
1852                 case 1:
1853                 case 2:
1854                 case 4:
1855                 case 8:
1856                     data = opy->offset;
1857                     s += ea_data.bytes;
1858                     if (ea_data.rip) {
1859                         if (opy->segment == segment) {
1860                             data -= insn_end;
1861                             if (overflow_signed(data, ea_data.bytes))
1862                                 warn_overflow(ERR_PASS2, ea_data.bytes);
1863                             out(offset, segment, &data, OUT_ADDRESS,
1864                                 ea_data.bytes, NO_SEG, NO_SEG);
1865                         } else {
1866                             /* overflow check in output/linker? */
1867                             out(offset, segment, &data, OUT_REL4ADR,
1868                                 insn_end - offset, opy->segment, opy->wrt);
1869                         }
1870                     } else {
1871                         if (overflow_general(opy->offset, ins->addr_size >> 3) ||
1872                             signed_bits(opy->offset, ins->addr_size) !=
1873                             signed_bits(opy->offset, ea_data.bytes * 8))
1874                             warn_overflow(ERR_PASS2, ea_data.bytes);
1875
1876                         type = OUT_ADDRESS;
1877                         out(offset, segment, &data, OUT_ADDRESS,
1878                             ea_data.bytes, opy->segment, opy->wrt);
1879                     }
1880                     break;
1881                 default:
1882                     /* Impossible! */
1883                     errfunc(ERR_PANIC,
1884                             "Invalid amount of bytes (%d) for offset?!",
1885                             ea_data.bytes);
1886                     break;
1887                 }
1888                 offset += s;
1889             }
1890             break;
1891
1892         default:
1893             errfunc(ERR_PANIC, "internal instruction table corrupt"
1894                     ": instruction code \\%o (0x%02X) given", c, c);
1895             break;
1896         }
1897     }
1898 }
1899
1900 static opflags_t regflag(const operand * o)
1901 {
1902     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1903         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1904     }
1905     return nasm_reg_flags[o->basereg];
1906 }
1907
1908 static int32_t regval(const operand * o)
1909 {
1910     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1911         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1912     }
1913     return nasm_regvals[o->basereg];
1914 }
1915
1916 static int op_rexflags(const operand * o, int mask)
1917 {
1918     opflags_t flags;
1919     int val;
1920
1921     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1922         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1923     }
1924
1925     flags = nasm_reg_flags[o->basereg];
1926     val = nasm_regvals[o->basereg];
1927
1928     return rexflags(val, flags, mask);
1929 }
1930
1931 static int rexflags(int val, opflags_t flags, int mask)
1932 {
1933     int rex = 0;
1934
1935     if (val >= 8)
1936         rex |= REX_B|REX_X|REX_R;
1937     if (flags & BITS64)
1938         rex |= REX_W;
1939     if (!(REG_HIGH & ~flags))   /* AH, CH, DH, BH */
1940         rex |= REX_H;
1941     else if (!(REG8 & ~flags) && val >= 4) /* SPL, BPL, SIL, DIL */
1942         rex |= REX_P;
1943
1944     return rex & mask;
1945 }
1946
1947 static enum match_result find_match(const struct itemplate **tempp,
1948                                     insn *instruction,
1949                                     int32_t segment, int64_t offset, int bits)
1950 {
1951     const struct itemplate *temp;
1952     enum match_result m, merr;
1953     opflags_t xsizeflags[MAX_OPERANDS];
1954     bool opsizemissing = false;
1955     int i;
1956
1957     for (i = 0; i < instruction->operands; i++)
1958         xsizeflags[i] = instruction->oprs[i].type & SIZE_MASK;
1959
1960     merr = MERR_INVALOP;
1961
1962     for (temp = nasm_instructions[instruction->opcode];
1963          temp->opcode != I_none; temp++) {
1964         m = matches(temp, instruction, bits);
1965         if (m == MOK_JUMP) {
1966             if (jmp_match(segment, offset, bits, instruction, temp->code))
1967                 m = MOK_GOOD;
1968             else
1969                 m = MERR_INVALOP;
1970         } else if (m == MERR_OPSIZEMISSING &&
1971                    (temp->flags & IF_SMASK) != IF_SX) {
1972             /*
1973              * Missing operand size and a candidate for fuzzy matching...
1974              */
1975             for (i = 0; i < temp->operands; i++) {
1976                 if ((temp->opd[i] & SAME_AS) == 0)
1977                     xsizeflags[i] |= temp->opd[i] & SIZE_MASK;
1978             }
1979             opsizemissing = true;
1980         }
1981         if (m > merr)
1982             merr = m;
1983         if (merr == MOK_GOOD)
1984             goto done;
1985     }
1986
1987     /* No match, but see if we can get a fuzzy operand size match... */
1988     if (!opsizemissing)
1989         goto done;
1990
1991     for (i = 0; i < instruction->operands; i++) {
1992         /*
1993          * We ignore extrinsic operand sizes on registers, so we should
1994          * never try to fuzzy-match on them.  This also resolves the case
1995          * when we have e.g. "xmmrm128" in two different positions.
1996          */
1997         if (is_class(REGISTER, instruction->oprs[i].type))
1998             continue;
1999
2000         /* This tests if xsizeflags[i] has more than one bit set */
2001         if ((xsizeflags[i] & (xsizeflags[i]-1)))
2002             goto done;          /* No luck */
2003
2004         instruction->oprs[i].type |= xsizeflags[i]; /* Set the size */
2005     }
2006
2007     /* Try matching again... */
2008     for (temp = nasm_instructions[instruction->opcode];
2009          temp->opcode != I_none; temp++) {
2010         m = matches(temp, instruction, bits);
2011         if (m == MOK_JUMP) {
2012             if (jmp_match(segment, offset, bits, instruction, temp->code))
2013                 m = MOK_GOOD;
2014             else
2015                 m = MERR_INVALOP;
2016         }
2017         if (m > merr)
2018             merr = m;
2019         if (merr == MOK_GOOD)
2020             goto done;
2021     }
2022
2023 done:
2024     *tempp = temp;
2025     return merr;
2026 }
2027
2028 static enum match_result matches(const struct itemplate *itemp,
2029                                  insn *instruction, int bits)
2030 {
2031     int i, size[MAX_OPERANDS], asize, oprs;
2032     bool opsizemissing = false;
2033
2034     /*
2035      * Check the opcode
2036      */
2037     if (itemp->opcode != instruction->opcode)
2038         return MERR_INVALOP;
2039
2040     /*
2041      * Count the operands
2042      */
2043     if (itemp->operands != instruction->operands)
2044         return MERR_INVALOP;
2045
2046     /*
2047      * Check that no spurious colons or TOs are present
2048      */
2049     for (i = 0; i < itemp->operands; i++)
2050         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
2051             return MERR_INVALOP;
2052
2053     /*
2054      * Process size flags
2055      */
2056     switch (itemp->flags & IF_SMASK) {
2057     case IF_SB:
2058         asize = BITS8;
2059         break;
2060     case IF_SW:
2061         asize = BITS16;
2062         break;
2063     case IF_SD:
2064         asize = BITS32;
2065         break;
2066     case IF_SQ:
2067         asize = BITS64;
2068         break;
2069     case IF_SO:
2070         asize = BITS128;
2071         break;
2072     case IF_SY:
2073         asize = BITS256;
2074         break;
2075     case IF_SZ:
2076         switch (bits) {
2077         case 16:
2078             asize = BITS16;
2079             break;
2080         case 32:
2081             asize = BITS32;
2082             break;
2083         case 64:
2084             asize = BITS64;
2085             break;
2086         default:
2087             asize = 0;
2088             break;
2089         }
2090         break;
2091     default:
2092         asize = 0;
2093         break;
2094     }
2095
2096     if (itemp->flags & IF_ARMASK) {
2097         /* S- flags only apply to a specific operand */
2098         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
2099         memset(size, 0, sizeof size);
2100         size[i] = asize;
2101     } else {
2102         /* S- flags apply to all operands */
2103         for (i = 0; i < MAX_OPERANDS; i++)
2104             size[i] = asize;
2105     }
2106
2107     /*
2108      * Check that the operand flags all match up,
2109      * it's a bit tricky so lets be verbose:
2110      *
2111      * 1) Find out the size of operand. If instruction
2112      *    doesn't have one specified -- we're trying to
2113      *    guess it either from template (IF_S* flag) or
2114      *    from code bits.
2115      *
2116      * 2) If template operand (i) has SAME_AS flag [used for registers only]
2117      *    (ie the same operand as was specified somewhere in template, and
2118      *    this referred operand index is being achieved via ~SAME_AS)
2119      *    we are to be sure that both registers (in template and instruction)
2120      *    do exactly match.
2121      *
2122      * 3) If template operand do not match the instruction OR
2123      *    template has an operand size specified AND this size differ
2124      *    from which instruction has (perhaps we got it from code bits)
2125      *    we are:
2126      *      a)  Check that only size of instruction and operand is differ
2127      *          other characteristics do match
2128      *      b)  Perhaps it's a register specified in instruction so
2129      *          for such a case we just mark that operand as "size
2130      *          missing" and this will turn on fuzzy operand size
2131      *          logic facility (handled by a caller)
2132      */
2133     for (i = 0; i < itemp->operands; i++) {
2134         opflags_t type = instruction->oprs[i].type;
2135         if (!(type & SIZE_MASK))
2136             type |= size[i];
2137
2138         if (itemp->opd[i] & SAME_AS) {
2139             int j = itemp->opd[i] & ~SAME_AS;
2140             if (type != instruction->oprs[j].type ||
2141                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2142                 return MERR_INVALOP;
2143         } else if (itemp->opd[i] & ~type ||
2144             ((itemp->opd[i] & SIZE_MASK) &&
2145              ((itemp->opd[i] ^ type) & SIZE_MASK))) {
2146             if ((itemp->opd[i] & ~type & ~SIZE_MASK) || (type & SIZE_MASK)) {
2147                 return MERR_INVALOP;
2148             } else if (!is_class(REGISTER, type)) {
2149                 /*
2150                  * Note: we don't honor extrinsic operand sizes for registers,
2151                  * so "missing operand size" for a register should be
2152                  * considered a wildcard match rather than an error.
2153                  */
2154                 opsizemissing = true;
2155             }
2156         }
2157     }
2158
2159     if (opsizemissing)
2160         return MERR_OPSIZEMISSING;
2161
2162     /*
2163      * Check operand sizes
2164      */
2165     if (itemp->flags & (IF_SM | IF_SM2)) {
2166         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2167         for (i = 0; i < oprs; i++) {
2168             asize = itemp->opd[i] & SIZE_MASK;
2169             if (asize) {
2170                 for (i = 0; i < oprs; i++)
2171                     size[i] = asize;
2172                 break;
2173             }
2174         }
2175     } else {
2176         oprs = itemp->operands;
2177     }
2178
2179     for (i = 0; i < itemp->operands; i++) {
2180         if (!(itemp->opd[i] & SIZE_MASK) &&
2181             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2182             return MERR_OPSIZEMISMATCH;
2183     }
2184
2185     /*
2186      * Check template is okay at the set cpu level
2187      */
2188     if (((itemp->flags & IF_PLEVEL) > cpu))
2189         return MERR_BADCPU;
2190
2191     /*
2192      * Verify the appropriate long mode flag.
2193      */
2194     if ((itemp->flags & (bits == 64 ? IF_NOLONG : IF_LONG)))
2195         return MERR_BADMODE;
2196
2197     /*
2198      * Check if special handling needed for Jumps
2199      */
2200     if ((itemp->code[0] & 0374) == 0370)
2201         return MOK_JUMP;
2202
2203     return MOK_GOOD;
2204 }
2205
2206 static ea *process_ea(operand * input, ea * output, int bits,
2207                       int addrbits, int rfield, opflags_t rflags)
2208 {
2209     bool forw_ref = !!(input->opflags & OPFLAG_UNKNOWN);
2210
2211     output->rip = false;
2212
2213     /* REX flags for the rfield operand */
2214     output->rex |= rexflags(rfield, rflags, REX_R|REX_P|REX_W|REX_H);
2215
2216     if (is_class(REGISTER, input->type)) {  /* register direct */
2217         int i;
2218         opflags_t f;
2219
2220         if (input->basereg < EXPR_REG_START /* Verify as Register */
2221             || input->basereg >= REG_ENUM_LIMIT)
2222             return NULL;
2223         f = regflag(input);
2224         i = nasm_regvals[input->basereg];
2225
2226         if (REG_EA & ~f)
2227             return NULL;        /* Invalid EA register */
2228
2229         output->rex |= op_rexflags(input, REX_B|REX_P|REX_W|REX_H);
2230
2231         output->sib_present = false;             /* no SIB necessary */
2232         output->bytes = 0;  /* no offset necessary either */
2233         output->modrm = 0xC0 | ((rfield & 7) << 3) | (i & 7);
2234     } else {                    /* it's a memory reference */
2235         if (input->basereg == -1
2236             && (input->indexreg == -1 || input->scale == 0)) {
2237             /* it's a pure offset */
2238
2239             if (bits == 64 && ((input->type & IP_REL) == IP_REL) &&
2240                 input->segment == NO_SEG) {
2241                 nasm_error(ERR_WARNING | ERR_PASS1, "absolute address can not be RIP-relative");
2242                 input->type &= ~IP_REL;
2243                 input->type |= MEMORY;
2244             }
2245
2246             if (input->eaflags & EAF_BYTEOFFS ||
2247                 (input->eaflags & EAF_WORDOFFS &&
2248                  input->disp_size != (addrbits != 16 ? 32 : 16))) {
2249                 nasm_error(ERR_WARNING | ERR_PASS1, "displacement size ignored on absolute address");
2250             }
2251
2252             if (bits == 64 && (~input->type & IP_REL)) {
2253               int scale, index, base;
2254               output->sib_present = true;
2255               scale = 0;
2256               index = 4;
2257               base = 5;
2258               output->sib = (scale << 6) | (index << 3) | base;
2259               output->bytes = 4;
2260               output->modrm = 4 | ((rfield & 7) << 3);
2261               output->rip = false;
2262             } else {
2263               output->sib_present = false;
2264               output->bytes = (addrbits != 16 ? 4 : 2);
2265               output->modrm = (addrbits != 16 ? 5 : 6) | ((rfield & 7) << 3);
2266               output->rip = bits == 64;
2267             }
2268         } else {                /* it's an indirection */
2269             int i = input->indexreg, b = input->basereg, s = input->scale;
2270             int32_t seg = input->segment;
2271             int hb = input->hintbase, ht = input->hinttype;
2272             int t, it, bt;              /* register numbers */
2273             opflags_t x, ix, bx;        /* register flags */
2274
2275             if (s == 0)
2276                 i = -1;         /* make this easy, at least */
2277
2278             if (i >= EXPR_REG_START && i < REG_ENUM_LIMIT) {
2279                 it = nasm_regvals[i];
2280                 ix = nasm_reg_flags[i];
2281             } else {
2282                 it = -1;
2283                 ix = 0;
2284             }
2285
2286             if (b >= EXPR_REG_START && b < REG_ENUM_LIMIT) {
2287                 bt = nasm_regvals[b];
2288                 bx = nasm_reg_flags[b];
2289             } else {
2290                 bt = -1;
2291                 bx = 0;
2292             }
2293
2294             /* check for a 32/64-bit memory reference... */
2295             if ((ix|bx) & (BITS32|BITS64)) {
2296                 /* it must be a 32/64-bit memory reference. Firstly we have
2297                  * to check that all registers involved are type E/Rxx. */
2298                 int32_t sok = BITS32|BITS64, o = input->offset;
2299
2300                 if (it != -1) {
2301                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2302                         sok &= ix;
2303                     else
2304                         return NULL;
2305                 }
2306
2307                 if (bt != -1) {
2308                     if (REG_GPR & ~bx)
2309                         return NULL; /* Invalid register */
2310                     if (~sok & bx & SIZE_MASK)
2311                         return NULL; /* Invalid size */
2312                     sok &= bx;
2313                 }
2314
2315                 /* While we're here, ensure the user didn't specify
2316                    WORD or QWORD. */
2317                 if (input->disp_size == 16 || input->disp_size == 64)
2318                     return NULL;
2319
2320                 if (addrbits == 16 ||
2321                     (addrbits == 32 && !(sok & BITS32)) ||
2322                     (addrbits == 64 && !(sok & BITS64)))
2323                     return NULL;
2324
2325                 /* now reorganize base/index */
2326                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2327                     ((hb == b && ht == EAH_NOTBASE)
2328                      || (hb == i && ht == EAH_MAKEBASE))) {
2329                     /* swap if hints say so */
2330                     t = bt, bt = it, it = t;
2331                     x = bx, bx = ix, ix = x;
2332                 }
2333                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2334                     bt = -1, bx = 0, s++;
2335                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2336                     /* make single reg base, unless hint */
2337                     bt = it, bx = ix, it = -1, ix = 0;
2338                 }
2339                 if (((s == 2 && it != REG_NUM_ESP
2340                       && !(input->eaflags & EAF_TIMESTWO)) || s == 3
2341                      || s == 5 || s == 9) && bt == -1)
2342                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2343                 if (it == -1 && (bt & 7) != REG_NUM_ESP
2344                     && (input->eaflags & EAF_TIMESTWO))
2345                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2346                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2347                 if (s == 1 && it == REG_NUM_ESP) {
2348                     /* swap ESP into base if scale is 1 */
2349                     t = it, it = bt, bt = t;
2350                     x = ix, ix = bx, bx = x;
2351                 }
2352                 if (it == REG_NUM_ESP
2353                     || (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2354                     return NULL;        /* wrong, for various reasons */
2355
2356                 output->rex |= rexflags(it, ix, REX_X);
2357                 output->rex |= rexflags(bt, bx, REX_B);
2358
2359                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2360                     /* no SIB needed */
2361                     int mod, rm;
2362
2363                     if (bt == -1) {
2364                         rm = 5;
2365                         mod = 0;
2366                     } else {
2367                         rm = (bt & 7);
2368                         if (rm != REG_NUM_EBP && o == 0 &&
2369                                 seg == NO_SEG && !forw_ref &&
2370                                 !(input->eaflags &
2371                                   (EAF_BYTEOFFS | EAF_WORDOFFS)))
2372                             mod = 0;
2373                         else if (input->eaflags & EAF_BYTEOFFS ||
2374                                  (o >= -128 && o <= 127 && seg == NO_SEG
2375                                   && !forw_ref
2376                                   && !(input->eaflags & EAF_WORDOFFS)))
2377                             mod = 1;
2378                         else
2379                             mod = 2;
2380                     }
2381
2382                     output->sib_present = false;
2383                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2384                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2385                 } else {
2386                     /* we need a SIB */
2387                     int mod, scale, index, base;
2388
2389                     if (it == -1)
2390                         index = 4, s = 1;
2391                     else
2392                         index = (it & 7);
2393
2394                     switch (s) {
2395                     case 1:
2396                         scale = 0;
2397                         break;
2398                     case 2:
2399                         scale = 1;
2400                         break;
2401                     case 4:
2402                         scale = 2;
2403                         break;
2404                     case 8:
2405                         scale = 3;
2406                         break;
2407                     default:   /* then what the smeg is it? */
2408                         return NULL;    /* panic */
2409                     }
2410
2411                     if (bt == -1) {
2412                         base = 5;
2413                         mod = 0;
2414                     } else {
2415                         base = (bt & 7);
2416                         if (base != REG_NUM_EBP && o == 0 &&
2417                                     seg == NO_SEG && !forw_ref &&
2418                                     !(input->eaflags &
2419                                       (EAF_BYTEOFFS | EAF_WORDOFFS)))
2420                             mod = 0;
2421                         else if (input->eaflags & EAF_BYTEOFFS ||
2422                                  (o >= -128 && o <= 127 && seg == NO_SEG
2423                                   && !forw_ref
2424                                   && !(input->eaflags & EAF_WORDOFFS)))
2425                             mod = 1;
2426                         else
2427                             mod = 2;
2428                     }
2429
2430                     output->sib_present = true;
2431                     output->bytes =  (bt == -1 || mod == 2 ? 4 : mod);
2432                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | 4;
2433                     output->sib = (scale << 6) | (index << 3) | base;
2434                 }
2435             } else {            /* it's 16-bit */
2436                 int mod, rm;
2437                 int16_t o = input->offset;
2438
2439                 /* check for 64-bit long mode */
2440                 if (addrbits == 64)
2441                     return NULL;
2442
2443                 /* check all registers are BX, BP, SI or DI */
2444                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI
2445                      && b != R_DI) || (i != -1 && i != R_BP && i != R_BX
2446                                        && i != R_SI && i != R_DI))
2447                     return NULL;
2448
2449                 /* ensure the user didn't specify DWORD/QWORD */
2450                 if (input->disp_size == 32 || input->disp_size == 64)
2451                     return NULL;
2452
2453                 if (s != 1 && i != -1)
2454                     return NULL;        /* no can do, in 16-bit EA */
2455                 if (b == -1 && i != -1) {
2456                     int tmp = b;
2457                     b = i;
2458                     i = tmp;
2459                 }               /* swap */
2460                 if ((b == R_SI || b == R_DI) && i != -1) {
2461                     int tmp = b;
2462                     b = i;
2463                     i = tmp;
2464                 }
2465                 /* have BX/BP as base, SI/DI index */
2466                 if (b == i)
2467                     return NULL;        /* shouldn't ever happen, in theory */
2468                 if (i != -1 && b != -1 &&
2469                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2470                     return NULL;        /* invalid combinations */
2471                 if (b == -1)    /* pure offset: handled above */
2472                     return NULL;        /* so if it gets to here, panic! */
2473
2474                 rm = -1;
2475                 if (i != -1)
2476                     switch (i * 256 + b) {
2477                     case R_SI * 256 + R_BX:
2478                         rm = 0;
2479                         break;
2480                     case R_DI * 256 + R_BX:
2481                         rm = 1;
2482                         break;
2483                     case R_SI * 256 + R_BP:
2484                         rm = 2;
2485                         break;
2486                     case R_DI * 256 + R_BP:
2487                         rm = 3;
2488                         break;
2489                 } else
2490                     switch (b) {
2491                     case R_SI:
2492                         rm = 4;
2493                         break;
2494                     case R_DI:
2495                         rm = 5;
2496                         break;
2497                     case R_BP:
2498                         rm = 6;
2499                         break;
2500                     case R_BX:
2501                         rm = 7;
2502                         break;
2503                     }
2504                 if (rm == -1)   /* can't happen, in theory */
2505                     return NULL;        /* so panic if it does */
2506
2507                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2508                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2509                     mod = 0;
2510                 else if (input->eaflags & EAF_BYTEOFFS ||
2511                          (o >= -128 && o <= 127 && seg == NO_SEG
2512                           && !forw_ref
2513                           && !(input->eaflags & EAF_WORDOFFS)))
2514                     mod = 1;
2515                 else
2516                     mod = 2;
2517
2518                 output->sib_present = false;    /* no SIB - it's 16-bit */
2519                 output->bytes = mod;    /* bytes of offset needed */
2520                 output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2521             }
2522         }
2523     }
2524
2525     output->size = 1 + output->sib_present + output->bytes;
2526     return output;
2527 }
2528
2529 static void add_asp(insn *ins, int addrbits)
2530 {
2531     int j, valid;
2532     int defdisp;
2533
2534     valid = (addrbits == 64) ? 64|32 : 32|16;
2535
2536     switch (ins->prefixes[PPS_ASIZE]) {
2537     case P_A16:
2538         valid &= 16;
2539         break;
2540     case P_A32:
2541         valid &= 32;
2542         break;
2543     case P_A64:
2544         valid &= 64;
2545         break;
2546     case P_ASP:
2547         valid &= (addrbits == 32) ? 16 : 32;
2548         break;
2549     default:
2550         break;
2551     }
2552
2553     for (j = 0; j < ins->operands; j++) {
2554         if (is_class(MEMORY, ins->oprs[j].type)) {
2555             opflags_t i, b;
2556
2557             /* Verify as Register */
2558             if (ins->oprs[j].indexreg < EXPR_REG_START
2559                 || ins->oprs[j].indexreg >= REG_ENUM_LIMIT)
2560                 i = 0;
2561             else
2562                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2563
2564             /* Verify as Register */
2565             if (ins->oprs[j].basereg < EXPR_REG_START
2566                 || ins->oprs[j].basereg >= REG_ENUM_LIMIT)
2567                 b = 0;
2568             else
2569                 b = nasm_reg_flags[ins->oprs[j].basereg];
2570
2571             if (ins->oprs[j].scale == 0)
2572                 i = 0;
2573
2574             if (!i && !b) {
2575                 int ds = ins->oprs[j].disp_size;
2576                 if ((addrbits != 64 && ds > 8) ||
2577                     (addrbits == 64 && ds == 16))
2578                     valid &= ds;
2579             } else {
2580                 if (!(REG16 & ~b))
2581                     valid &= 16;
2582                 if (!(REG32 & ~b))
2583                     valid &= 32;
2584                 if (!(REG64 & ~b))
2585                     valid &= 64;
2586
2587                 if (!(REG16 & ~i))
2588                     valid &= 16;
2589                 if (!(REG32 & ~i))
2590                     valid &= 32;
2591                 if (!(REG64 & ~i))
2592                     valid &= 64;
2593             }
2594         }
2595     }
2596
2597     if (valid & addrbits) {
2598         ins->addr_size = addrbits;
2599     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2600         /* Add an address size prefix */
2601         enum prefixes pref = (addrbits == 32) ? P_A16 : P_A32;
2602         ins->prefixes[PPS_ASIZE] = pref;
2603         ins->addr_size = (addrbits == 32) ? 16 : 32;
2604     } else {
2605         /* Impossible... */
2606         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2607         ins->addr_size = addrbits; /* Error recovery */
2608     }
2609
2610     defdisp = ins->addr_size == 16 ? 16 : 32;
2611
2612     for (j = 0; j < ins->operands; j++) {
2613         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2614             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp)
2615             != ins->addr_size) {
2616             /* mem_offs sizes must match the address size; if not,
2617                strip the MEM_OFFS bit and match only EA instructions */
2618             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2619         }
2620     }
2621 }