assemble.c

   1 /* ----------------------------------------------------------------------- *
   2  *
   3  *   Copyright 1996-2009 The NASM Authors - All Rights Reserved
   4  *   See the file AUTHORS included with the NASM distribution for
   5  *   the specific copyright holders.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following
   9  *   conditions are met:
  10  *
  11  *   * Redistributions of source code must retain the above copyright
  12  *     notice, this list of conditions and the following disclaimer.
  13  *   * Redistributions in binary form must reproduce the above
  14  *     copyright notice, this list of conditions and the following
  15  *     disclaimer in the documentation and/or other materials provided
  16  *     with the distribution.
  17  *
  18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  19  *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  20  *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  21  *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22  *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  23  *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24  *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  25  *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28  *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  29  *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  30  *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31  *
  32  * ----------------------------------------------------------------------- */
  33
  34 /*
  35  * assemble.c   code generation for the Netwide Assembler
  36  *
  37  * the actual codes (C syntax, i.e. octal):
  38  * \0            - terminates the code. (Unless it's a literal of course.)
  39  * \1..\4        - that many literal bytes follow in the code stream
  40  * \5            - add 4 to the primary operand number (b, low octdigit)
  41  * \6            - add 4 to the secondary operand number (a, middle octdigit)
  42  * \7            - add 4 to both the primary and the secondary operand number
  43  * \10..\13      - a literal byte follows in the code stream, to be added
  44  *                 to the register value of operand 0..3
  45  * \14..\17      - a signed byte immediate operand, from operand 0..3
  46  * \20..\23      - a byte immediate operand, from operand 0..3
  47  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  48  * \30..\33      - a word immediate operand, from operand 0..3
  49  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  50  *                 assembly mode or the operand-size override on the operand
  51  * \40..\43      - a long immediate operand, from operand 0..3
  52  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  53  *                 depending on the address size of the instruction.
  54  * \50..\53      - a byte relative operand, from operand 0..3
  55  * \54..\57      - a qword immediate operand, from operand 0..3
  56  * \60..\63      - a word relative operand, from operand 0..3
  57  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  58  *                 assembly mode or the operand-size override on the operand
  59  * \70..\73      - a long relative operand, from operand 0..3
  60  * \74..\77      - a word constant, from the _segment_ part of operand 0..3
  61  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  62  *                 field the register value of operand b.
  63  * \140..\143    - an immediate word or signed byte for operand 0..3
  64  * \144..\147    - or 2 (s-field) into opcode byte if operand 0..3
  65  *                  is a signed byte rather than a word.  Opcode byte follows.
  66  * \150..\153    - an immediate dword or signed byte for operand 0..3
  67  * \154..\157    - or 2 (s-field) into opcode byte if operand 0..3
  68  *                  is a signed byte rather than a dword.  Opcode byte follows.
  69  * \160..\163    - this instruction uses DREX rather than REX, with the
  70  *                 OC0 field set to 0, and the dest field taken from
  71  *                 operand 0..3.
  72  * \164..\167    - this instruction uses DREX rather than REX, with the
  73  *                 OC0 field set to 1, and the dest field taken from
  74  *                 operand 0..3.
  75  * \171          - placement of DREX suffix in the absence of an EA
  76  * \172\ab       - the register number from operand a in bits 7..4, with
  77  *                 the 4-bit immediate from operand b in bits 3..0.
  78  * \173\xab      - the register number from operand a in bits 7..4, with
  79  *                 the value b in bits 3..0.
  80  * \174\a        - the register number from operand a in bits 7..4, and
  81  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  82  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  83  *                 field equal to digit b.
  84  * \250..\253    - same as \150..\153, except warn if the 64-bit operand
  85  *                 is not equal to the truncated and sign-extended 32-bit
  86  *                 operand; used for 32-bit immediates in 64-bit mode.
  87  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
  88  * \260..\263    - this instruction uses VEX/XOP rather than REX, with the
  89  *                 V field taken from operand 0..3.
  90  * \270          - this instruction uses VEX/XOP rather than REX, with the
  91  *                 V field set to 1111b.
  92  *
  93  * VEX/XOP prefixes are followed by the sequence:
  94  * \tmm\wlp        where mm is the M field; and wlp is:
  95  *                 00 0ww lpp
  96  *                 [w0] ww = 0 for W = 0
  97  *                 [w1] ww = 1 for W = 1
  98  *                 [wx] ww = 2 for W don't care (always assembled as 0)
  99  *                 [ww] ww = 3 for W used as REX.W
 100  *
 101  * t = 0 for VEX (C4/C5), t = 1 for XOP (8F).
 102  *
 103  * \274..\277    - a signed byte immediate operand, from operand 0..3,
 104  *                 which is to be extended to the operand size.
 105  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
 106  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
 107  * \312          - (disassembler only) invalid with non-default address size.
 108  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
 109  * \314          - (disassembler only) invalid with REX.B
 110  * \315          - (disassembler only) invalid with REX.X
 111  * \316          - (disassembler only) invalid with REX.R
 112  * \317          - (disassembler only) invalid with REX.W
 113  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
 114  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
 115  * \322          - indicates that this instruction is only valid when the
 116  *                 operand size is the default (instruction to disassembler,
 117  *                 generates no code in the assembler)
 118  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
 119  * \324          - indicates 64-bit operand size requiring REX prefix.
 120  * \325          - instruction which always uses spl/bpl/sil/dil
 121  * \330          - a literal byte follows in the code stream, to be added
 122  *                 to the condition code value of the instruction.
 123  * \331          - instruction not valid with REP prefix.  Hint for
 124  *                 disassembler only; for SSE instructions.
 125  * \332          - REP prefix (0xF2 byte) used as opcode extension.
 126  * \333          - REP prefix (0xF3 byte) used as opcode extension.
 127  * \334          - LOCK prefix used as REX.R (used in non-64-bit mode)
 128  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
 129  * \336          - force a REP(E) prefix (0xF2) even if not specified.
 130  * \337          - force a REPNE prefix (0xF3) even if not specified.
 131  *                 \336-\337 are still listed as prefixes in the disassembler.
 132  * \340          - reserve <operand 0> bytes of uninitialized storage.
 133  *                 Operand 0 had better be a segmentless constant.
 134  * \341          - this instruction needs a WAIT "prefix"
 135  * \344,\345     - the PUSH/POP (respectively) codes for CS, DS, ES, SS
 136  *                 (POP is never used for CS) depending on operand 0
 137  * \346,\347     - the second byte of PUSH/POP codes for FS, GS, depending
 138  *                 on operand 0
 139  * \360          - no SSE prefix (== \364\331)
 140  * \361          - 66 SSE prefix (== \366\331)
 141  * \362          - F2 SSE prefix (== \364\332)
 142  * \363          - F3 SSE prefix (== \364\333)
 143  * \364          - operand-size prefix (0x66) not permitted
 144  * \365          - address-size prefix (0x67) not permitted
 145  * \366          - operand-size prefix (0x66) used as opcode extension
 146  * \367          - address-size prefix (0x67) used as opcode extension
 147  * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
 148  *                 370 is used for Jcc, 371 is used for JMP.
 149  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 150  *                 used for conditional jump over longer jump
 151  */
 152
 153 #include "compiler.h"
 154
 155 #include <stdio.h>
 156 #include <string.h>
 157 #include <inttypes.h>
 158
 159 #include "nasm.h"
 160 #include "nasmlib.h"
 161 #include "assemble.h"
 162 #include "insns.h"
 163 #include "tables.h"
 164
 165 enum match_result {
 166     /*
 167      * Matching errors.  These should be sorted so that more specific
 168      * errors come later in the sequence.
 169      */
 170     MERR_INVALOP,
 171     MERR_OPSIZEMISSING,
 172     MERR_OPSIZEMISMATCH,
 173     MERR_BADCPU,
 174     MERR_BADMODE,
 175     /*
 176      * Matching success; the conditional ones first
 177      */
 178     MOK_JUMP,                   /* Matching OK but needs jmp_match() */
 179     MOK_GOOD                    /* Matching unconditionally OK */
 180 };
 181
 182 typedef struct {
 183     int sib_present;                 /* is a SIB byte necessary? */
 184     int bytes;                       /* # of bytes of offset needed */
 185     int size;                        /* lazy - this is sib+bytes+1 */
 186     uint8_t modrm, sib, rex, rip;    /* the bytes themselves */
 187 } ea;
 188
 189 static uint32_t cpu;            /* cpu level received from nasm.c */
 190 static efunc errfunc;
 191 static struct ofmt *outfmt;
 192 static ListGen *list;
 193
 194 static int64_t calcsize(int32_t, int64_t, int, insn *, const uint8_t *);
 195 static void gencode(int32_t segment, int64_t offset, int bits,
 196                     insn * ins, const struct itemplate *temp,
 197                     int64_t insn_end);
 198 static enum match_result find_match(const struct itemplate **tempp,
 199                                     insn *instruction,
 200                                     int32_t segment, int64_t offset, int bits);
 201 static enum match_result matches(const struct itemplate *, insn *, int bits);
 202 static opflags_t regflag(const operand *);
 203 static int32_t regval(const operand *);
 204 static int rexflags(int, opflags_t, int);
 205 static int op_rexflags(const operand *, int);
 206 static ea *process_ea(operand *, ea *, int, int, int, opflags_t);
 207 static void add_asp(insn *, int);
 208
 209 static int has_prefix(insn * ins, enum prefix_pos pos, enum prefixes prefix)
 210 {
 211     return ins->prefixes[pos] == prefix;
 212 }
 213
 214 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 215 {
 216     if (ins->prefixes[pos])
 217         errfunc(ERR_NONFATAL, "invalid %s prefix",
 218                 prefix_name(ins->prefixes[pos]));
 219 }
 220
 221 static const char *size_name(int size)
 222 {
 223     switch (size) {
 224     case 1:
 225         return "byte";
 226     case 2:
 227         return "word";
 228     case 4:
 229         return "dword";
 230     case 8:
 231         return "qword";
 232     case 10:
 233         return "tword";
 234     case 16:
 235         return "oword";
 236     case 32:
 237         return "yword";
 238     default:
 239         return "???";
 240     }
 241 }
 242
 243 static void warn_overflow(int pass, int size)
 244 {
 245     errfunc(ERR_WARNING | pass | ERR_WARN_NOV,
 246             "%s data exceeds bounds", size_name(size));
 247 }
 248
 249 static void warn_overflow_const(int64_t data, int size)
 250 {
 251     if (overflow_general(data, size))
 252         warn_overflow(ERR_PASS1, size);
 253 }
 254
 255 static void warn_overflow_opd(const struct operand *o, int size)
 256 {
 257     if (o->wrt == NO_SEG && o->segment == NO_SEG) {
 258         if (overflow_general(o->offset, size))
 259             warn_overflow(ERR_PASS2, size);
 260     }
 261 }
 262
 263 /*
 264  * This routine wrappers the real output format's output routine,
 265  * in order to pass a copy of the data off to the listing file
 266  * generator at the same time.
 267  */
 268 static void out(int64_t offset, int32_t segto, const void *data,
 269                 enum out_type type, uint64_t size,
 270                 int32_t segment, int32_t wrt)
 271 {
 272     static int32_t lineno = 0;     /* static!!! */
 273     static char *lnfname = NULL;
 274     uint8_t p[8];
 275
 276     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 277         /*
 278          * This is a non-relocated address, and we're going to
 279          * convert it into RAWDATA format.
 280          */
 281         uint8_t *q = p;
 282
 283         if (size > 8) {
 284             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 285             return;
 286         }
 287
 288         WRITEADDR(q, *(int64_t *)data, size);
 289         data = p;
 290         type = OUT_RAWDATA;
 291     }
 292
 293     list->output(offset, data, type, size);
 294
 295     /*
 296      * this call to src_get determines when we call the
 297      * debug-format-specific "linenum" function
 298      * it updates lineno and lnfname to the current values
 299      * returning 0 if "same as last time", -2 if lnfname
 300      * changed, and the amount by which lineno changed,
 301      * if it did. thus, these variables must be static
 302      */
 303
 304     if (src_get(&lineno, &lnfname)) {
 305         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 306     }
 307
 308     outfmt->output(segto, data, type, size, segment, wrt);
 309 }
 310
 311 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 312                      insn * ins, const uint8_t *code)
 313 {
 314     int64_t isize;
 315     uint8_t c = code[0];
 316
 317     if ((c != 0370 && c != 0371) || (ins->oprs[0].type & STRICT))
 318         return false;
 319     if (!optimizing)
 320         return false;
 321     if (optimizing < 0 && c == 0371)
 322         return false;
 323
 324     isize = calcsize(segment, offset, bits, ins, code);
 325
 326     if (ins->oprs[0].opflags & OPFLAG_UNKNOWN)
 327         /* Be optimistic in pass 1 */
 328         return true;
 329
 330     if (ins->oprs[0].segment != segment)
 331         return false;
 332
 333     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 334     return (isize >= -128 && isize <= 127); /* is it byte size? */
 335 }
 336
 337 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 338                  insn * instruction, struct ofmt *output, efunc error,
 339                  ListGen * listgen)
 340 {
 341     const struct itemplate *temp;
 342     int j;
 343     enum match_result m;
 344     int64_t insn_end;
 345     int32_t itimes;
 346     int64_t start = offset;
 347     int64_t wsize;              /* size for DB etc. */
 348
 349     errfunc = error;            /* to pass to other functions */
 350     cpu = cp;
 351     outfmt = output;            /* likewise */
 352     list = listgen;             /* and again */
 353
 354     wsize = idata_bytes(instruction->opcode);
 355     if (wsize == -1)
 356         return 0;
 357
 358     if (wsize) {
 359         extop *e;
 360         int32_t t = instruction->times;
 361         if (t < 0)
 362             errfunc(ERR_PANIC,
 363                     "instruction->times < 0 (%ld) in assemble()", t);
 364
 365         while (t--) {           /* repeat TIMES times */
 366             list_for_each(e, instruction->eops) {
 367                 if (e->type == EOT_DB_NUMBER) {
 368                     if (wsize > 8) {
 369                         errfunc(ERR_NONFATAL,
 370                                 "integer supplied to a DT, DO or DY"
 371                                 " instruction");
 372                     } else {
 373                         out(offset, segment, &e->offset,
 374                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 375                         offset += wsize;
 376                     }
 377                 } else if (e->type == EOT_DB_STRING ||
 378                            e->type == EOT_DB_STRING_FREE) {
 379                     int align;
 380
 381                     out(offset, segment, e->stringval,
 382                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 383                     align = e->stringlen % wsize;
 384
 385                     if (align) {
 386                         align = wsize - align;
 387                         out(offset, segment, zero_buffer,
 388                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 389                     }
 390                     offset += e->stringlen + align;
 391                 }
 392             }
 393             if (t > 0 && t == instruction->times - 1) {
 394                 /*
 395                  * Dummy call to list->output to give the offset to the
 396                  * listing module.
 397                  */
 398                 list->output(offset, NULL, OUT_RAWDATA, 0);
 399                 list->uplevel(LIST_TIMES);
 400             }
 401         }
 402         if (instruction->times > 1)
 403             list->downlevel(LIST_TIMES);
 404         return offset - start;
 405     }
 406
 407     if (instruction->opcode == I_INCBIN) {
 408         const char *fname = instruction->eops->stringval;
 409         FILE *fp;
 410
 411         fp = fopen(fname, "rb");
 412         if (!fp) {
 413             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 414                   fname);
 415         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 416             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 417                   fname);
 418         } else {
 419             static char buf[4096];
 420             size_t t = instruction->times;
 421             size_t base = 0;
 422             size_t len;
 423
 424             len = ftell(fp);
 425             if (instruction->eops->next) {
 426                 base = instruction->eops->next->offset;
 427                 len -= base;
 428                 if (instruction->eops->next->next &&
 429                     len > (size_t)instruction->eops->next->next->offset)
 430                     len = (size_t)instruction->eops->next->next->offset;
 431             }
 432             /*
 433              * Dummy call to list->output to give the offset to the
 434              * listing module.
 435              */
 436             list->output(offset, NULL, OUT_RAWDATA, 0);
 437             list->uplevel(LIST_INCBIN);
 438             while (t--) {
 439                 size_t l;
 440
 441                 fseek(fp, base, SEEK_SET);
 442                 l = len;
 443                 while (l > 0) {
 444                     int32_t m;
 445                     m = fread(buf, 1, l > sizeof(buf) ? sizeof(buf) : l, fp);
 446                     if (!m) {
 447                         /*
 448                          * This shouldn't happen unless the file
 449                          * actually changes while we are reading
 450                          * it.
 451                          */
 452                         error(ERR_NONFATAL,
 453                               "`incbin': unexpected EOF while"
 454                               " reading file `%s'", fname);
 455                         t = 0;  /* Try to exit cleanly */
 456                         break;
 457                     }
 458                     out(offset, segment, buf, OUT_RAWDATA, m,
 459                         NO_SEG, NO_SEG);
 460                     l -= m;
 461                 }
 462             }
 463             list->downlevel(LIST_INCBIN);
 464             if (instruction->times > 1) {
 465                 /*
 466                  * Dummy call to list->output to give the offset to the
 467                  * listing module.
 468                  */
 469                 list->output(offset, NULL, OUT_RAWDATA, 0);
 470                 list->uplevel(LIST_TIMES);
 471                 list->downlevel(LIST_TIMES);
 472             }
 473             fclose(fp);
 474             return instruction->times * len;
 475         }
 476         return 0;               /* if we're here, there's an error */
 477     }
 478
 479     /* Check to see if we need an address-size prefix */
 480     add_asp(instruction, bits);
 481
 482     m = find_match(&temp, instruction, segment, offset, bits);
 483
 484     if (m == MOK_GOOD) {
 485         /* Matches! */
 486         int64_t insn_size = calcsize(segment, offset, bits,
 487                                      instruction, temp->code);
 488         itimes = instruction->times;
 489         if (insn_size < 0)  /* shouldn't be, on pass two */
 490             error(ERR_PANIC, "errors made it through from pass one");
 491         else
 492             while (itimes--) {
 493                 for (j = 0; j < MAXPREFIX; j++) {
 494                     uint8_t c = 0;
 495                     switch (instruction->prefixes[j]) {
 496                     case P_WAIT:
 497                         c = 0x9B;
 498                         break;
 499                     case P_LOCK:
 500                         c = 0xF0;
 501                         break;
 502                     case P_REPNE:
 503                     case P_REPNZ:
 504                         c = 0xF2;
 505                         break;
 506                     case P_REPE:
 507                     case P_REPZ:
 508                     case P_REP:
 509                         c = 0xF3;
 510                         break;
 511                     case R_CS:
 512                         if (bits == 64) {
 513                             error(ERR_WARNING | ERR_PASS2,
 514                                   "cs segment base generated, but will be ignored in 64-bit mode");
 515                         }
 516                         c = 0x2E;
 517                         break;
 518                     case R_DS:
 519                         if (bits == 64) {
 520                             error(ERR_WARNING | ERR_PASS2,
 521                                   "ds segment base generated, but will be ignored in 64-bit mode");
 522                         }
 523                         c = 0x3E;
 524                         break;
 525                     case R_ES:
 526                         if (bits == 64) {
 527                             error(ERR_WARNING | ERR_PASS2,
 528                                   "es segment base generated, but will be ignored in 64-bit mode");
 529                         }
 530                         c = 0x26;
 531                         break;
 532                     case R_FS:
 533                         c = 0x64;
 534                         break;
 535                     case R_GS:
 536                         c = 0x65;
 537                         break;
 538                     case R_SS:
 539                         if (bits == 64) {
 540                             error(ERR_WARNING | ERR_PASS2,
 541                                   "ss segment base generated, but will be ignored in 64-bit mode");
 542                         }
 543                         c = 0x36;
 544                         break;
 545                     case R_SEGR6:
 546                     case R_SEGR7:
 547                         error(ERR_NONFATAL,
 548                               "segr6 and segr7 cannot be used as prefixes");
 549                         break;
 550                     case P_A16:
 551                         if (bits == 64) {
 552                             error(ERR_NONFATAL,
 553                                   "16-bit addressing is not supported "
 554                                   "in 64-bit mode");
 555                         } else if (bits != 16)
 556                             c = 0x67;
 557                         break;
 558                     case P_A32:
 559                         if (bits != 32)
 560                             c = 0x67;
 561                         break;
 562                     case P_A64:
 563                         if (bits != 64) {
 564                             error(ERR_NONFATAL,
 565                                   "64-bit addressing is only supported "
 566                                   "in 64-bit mode");
 567                         }
 568                         break;
 569                     case P_ASP:
 570                         c = 0x67;
 571                         break;
 572                     case P_O16:
 573                         if (bits != 16)
 574                             c = 0x66;
 575                         break;
 576                     case P_O32:
 577                         if (bits == 16)
 578                             c = 0x66;
 579                         break;
 580                     case P_O64:
 581                         /* REX.W */
 582                         break;
 583                     case P_OSP:
 584                         c = 0x66;
 585                         break;
 586                     case P_none:
 587                         break;
 588                     default:
 589                         error(ERR_PANIC, "invalid instruction prefix");
 590                     }
 591                     if (c != 0) {
 592                         out(offset, segment, &c, OUT_RAWDATA, 1,
 593                             NO_SEG, NO_SEG);
 594                         offset++;
 595                     }
 596                 }
 597                 insn_end = offset + insn_size;
 598                 gencode(segment, offset, bits, instruction,
 599                         temp, insn_end);
 600                 offset += insn_size;
 601                 if (itimes > 0 && itimes == instruction->times - 1) {
 602                     /*
 603                      * Dummy call to list->output to give the offset to the
 604                      * listing module.
 605                      */
 606                     list->output(offset, NULL, OUT_RAWDATA, 0);
 607                     list->uplevel(LIST_TIMES);
 608                 }
 609             }
 610         if (instruction->times > 1)
 611             list->downlevel(LIST_TIMES);
 612         return offset - start;
 613     } else {
 614         /* No match */
 615         switch (m) {
 616         case MERR_OPSIZEMISSING:
 617             error(ERR_NONFATAL, "operation size not specified");
 618             break;
 619         case MERR_OPSIZEMISMATCH:
 620             error(ERR_NONFATAL, "mismatch in operand sizes");
 621             break;
 622         case MERR_BADCPU:
 623             error(ERR_NONFATAL, "no instruction for this cpu level");
 624             break;
 625         case MERR_BADMODE:
 626             error(ERR_NONFATAL, "instruction not supported in %d-bit mode",
 627                   bits);
 628             break;
 629         default:
 630             error(ERR_NONFATAL,
 631                   "invalid combination of opcode and operands");
 632             break;
 633         }
 634     }
 635     return 0;
 636 }
 637
 638 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 639                   insn * instruction, efunc error)
 640 {
 641     const struct itemplate *temp;
 642     enum match_result m;
 643
 644     errfunc = error;            /* to pass to other functions */
 645     cpu = cp;
 646
 647     if (instruction->opcode == I_none)
 648         return 0;
 649
 650     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 651         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 652         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 653         instruction->opcode == I_DY) {
 654         extop *e;
 655         int32_t isize, osize, wsize;
 656
 657         isize = 0;
 658         wsize = idata_bytes(instruction->opcode);
 659
 660         list_for_each(e, instruction->eops) {
 661             int32_t align;
 662
 663             osize = 0;
 664             if (e->type == EOT_DB_NUMBER) {
 665                 osize = 1;
 666                 warn_overflow_const(e->offset, wsize);
 667             } else if (e->type == EOT_DB_STRING ||
 668                        e->type == EOT_DB_STRING_FREE)
 669                 osize = e->stringlen;
 670
 671             align = (-osize) % wsize;
 672             if (align < 0)
 673                 align += wsize;
 674             isize += osize + align;
 675         }
 676         return isize * instruction->times;
 677     }
 678
 679     if (instruction->opcode == I_INCBIN) {
 680         const char *fname = instruction->eops->stringval;
 681         FILE *fp;
 682         int64_t val = 0;
 683         size_t len;
 684
 685         fp = fopen(fname, "rb");
 686         if (!fp)
 687             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 688                   fname);
 689         else if (fseek(fp, 0L, SEEK_END) < 0)
 690             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 691                   fname);
 692         else {
 693             len = ftell(fp);
 694             if (instruction->eops->next) {
 695                 len -= instruction->eops->next->offset;
 696                 if (instruction->eops->next->next &&
 697                     len > (size_t)instruction->eops->next->next->offset) {
 698                     len = (size_t)instruction->eops->next->next->offset;
 699                 }
 700             }
 701             val = instruction->times * len;
 702         }
 703         if (fp)
 704             fclose(fp);
 705         return val;
 706     }
 707
 708     /* Check to see if we need an address-size prefix */
 709     add_asp(instruction, bits);
 710
 711     m = find_match(&temp, instruction, segment, offset, bits);
 712     if (m == MOK_GOOD) {
 713         /* we've matched an instruction. */
 714         int64_t isize;
 715         const uint8_t *codes = temp->code;
 716         int j;
 717
 718         isize = calcsize(segment, offset, bits, instruction, codes);
 719         if (isize < 0)
 720             return -1;
 721         for (j = 0; j < MAXPREFIX; j++) {
 722             switch (instruction->prefixes[j]) {
 723             case P_A16:
 724                 if (bits != 16)
 725                     isize++;
 726                 break;
 727             case P_A32:
 728                 if (bits != 32)
 729                     isize++;
 730                 break;
 731             case P_O16:
 732                 if (bits != 16)
 733                     isize++;
 734                 break;
 735             case P_O32:
 736                 if (bits == 16)
 737                     isize++;
 738                 break;
 739             case P_A64:
 740             case P_O64:
 741             case P_none:
 742                 break;
 743             default:
 744                 isize++;
 745                 break;
 746             }
 747         }
 748         return isize * instruction->times;
 749     } else {
 750         return -1;                  /* didn't match any instruction */
 751     }
 752 }
 753
 754 static bool possible_sbyte(operand *o)
 755 {
 756     return o->wrt == NO_SEG && o->segment == NO_SEG &&
 757         !(o->opflags & OPFLAG_UNKNOWN) &&
 758         optimizing >= 0 && !(o->type & STRICT);
 759 }
 760
 761 /* check that opn[op]  is a signed byte of size 16 or 32 */
 762 static bool is_sbyte16(operand *o)
 763 {
 764     int16_t v;
 765
 766     if (!possible_sbyte(o))
 767         return false;
 768
 769     v = o->offset;
 770     return v >= -128 && v <= 127;
 771 }
 772
 773 static bool is_sbyte32(operand *o)
 774 {
 775     int32_t v;
 776
 777     if (!possible_sbyte(o))
 778         return false;
 779
 780     v = o->offset;
 781     return v >= -128 && v <= 127;
 782 }
 783
 784 /* Common construct */
 785 #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
 786
 787 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 788                         insn * ins, const uint8_t *codes)
 789 {
 790     int64_t length = 0;
 791     uint8_t c;
 792     int rex_mask = ~0;
 793     int op1, op2;
 794     struct operand *opx;
 795     uint8_t opex = 0;
 796
 797     ins->rex = 0;               /* Ensure REX is reset */
 798
 799     if (ins->prefixes[PPS_OSIZE] == P_O64)
 800         ins->rex |= REX_W;
 801
 802     (void)segment;              /* Don't warn that this parameter is unused */
 803     (void)offset;               /* Don't warn that this parameter is unused */
 804
 805     while (*codes) {
 806         c = *codes++;
 807         op1 = (c & 3) + ((opex & 1) << 2);
 808         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
 809         opx = &ins->oprs[op1];
 810         opex = 0;               /* For the next iteration */
 811
 812         switch (c) {
 813         case 01:
 814         case 02:
 815         case 03:
 816         case 04:
 817             codes += c, length += c;
 818             break;
 819
 820         case 05:
 821         case 06:
 822         case 07:
 823             opex = c;
 824             break;
 825
 826         case4(010):
 827             ins->rex |=
 828                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 829             codes++, length++;
 830             break;
 831
 832         case4(014):
 833         case4(020):
 834         case4(024):
 835             length++;
 836             break;
 837
 838         case4(030):
 839             length += 2;
 840             break;
 841
 842         case4(034):
 843             if (opx->type & (BITS16 | BITS32 | BITS64))
 844                 length += (opx->type & BITS16) ? 2 : 4;
 845             else
 846                 length += (bits == 16) ? 2 : 4;
 847             break;
 848
 849         case4(040):
 850             length += 4;
 851             break;
 852
 853         case4(044):
 854             length += ins->addr_size >> 3;
 855             break;
 856
 857         case4(050):
 858             length++;
 859             break;
 860
 861         case4(054):
 862             length += 8; /* MOV reg64/imm */
 863             break;
 864
 865         case4(060):
 866             length += 2;
 867             break;
 868
 869         case4(064):
 870             if (opx->type & (BITS16 | BITS32 | BITS64))
 871                 length += (opx->type & BITS16) ? 2 : 4;
 872             else
 873                 length += (bits == 16) ? 2 : 4;
 874             break;
 875
 876         case4(070):
 877             length += 4;
 878             break;
 879
 880         case4(074):
 881             length += 2;
 882             break;
 883
 884         case4(0140):
 885             length += is_sbyte16(opx) ? 1 : 2;
 886             break;
 887
 888         case4(0144):
 889             codes++;
 890             length++;
 891             break;
 892
 893         case4(0150):
 894             length += is_sbyte32(opx) ? 1 : 4;
 895             break;
 896
 897         case4(0154):
 898             codes++;
 899             length++;
 900             break;
 901
 902         case4(0160):
 903             length++;
 904             ins->rex |= REX_D;
 905             ins->drexdst = regval(opx);
 906             break;
 907
 908         case4(0164):
 909             length++;
 910             ins->rex |= REX_D|REX_OC;
 911             ins->drexdst = regval(opx);
 912             break;
 913
 914         case 0171:
 915             break;
 916
 917         case 0172:
 918         case 0173:
 919         case 0174:
 920             codes++;
 921             length++;
 922             break;
 923
 924         case4(0250):
 925             length += is_sbyte32(opx) ? 1 : 4;
 926             break;
 927
 928         case4(0254):
 929             length += 4;
 930             break;
 931
 932         case4(0260):
 933             ins->rex |= REX_V;
 934             ins->drexdst = regval(opx);
 935             ins->vex_cm = *codes++;
 936             ins->vex_wlp = *codes++;
 937             break;
 938
 939         case 0270:
 940             ins->rex |= REX_V;
 941             ins->drexdst = 0;
 942             ins->vex_cm = *codes++;
 943             ins->vex_wlp = *codes++;
 944             break;
 945
 946         case4(0274):
 947             length++;
 948             break;
 949
 950         case4(0300):
 951             break;
 952
 953         case 0310:
 954             if (bits == 64)
 955                 return -1;
 956             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
 957             break;
 958
 959         case 0311:
 960             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
 961             break;
 962
 963         case 0312:
 964             break;
 965
 966         case 0313:
 967             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
 968                 has_prefix(ins, PPS_ASIZE, P_A32))
 969                 return -1;
 970             break;
 971
 972         case4(0314):
 973             break;
 974
 975         case 0320:
 976             length += (bits != 16);
 977             break;
 978
 979         case 0321:
 980             length += (bits == 16);
 981             break;
 982
 983         case 0322:
 984             break;
 985
 986         case 0323:
 987             rex_mask &= ~REX_W;
 988             break;
 989
 990         case 0324:
 991             ins->rex |= REX_W;
 992             break;
 993
 994         case 0325:
 995             ins->rex |= REX_NH;
 996             break;
 997
 998         case 0330:
 999             codes++, length++;
1000             break;
1001
1002         case 0331:
1003             break;
1004
1005         case 0332:
1006         case 0333:
1007             length++;
1008             break;
1009
1010         case 0334:
1011             ins->rex |= REX_L;
1012             break;
1013
1014         case 0335:
1015             break;
1016
1017         case 0336:
1018             if (!ins->prefixes[PPS_LREP])
1019                 ins->prefixes[PPS_LREP] = P_REP;
1020             break;
1021
1022         case 0337:
1023             if (!ins->prefixes[PPS_LREP])
1024                 ins->prefixes[PPS_LREP] = P_REPNE;
1025             break;
1026
1027         case 0340:
1028             if (ins->oprs[0].segment != NO_SEG)
1029                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1030                         " quantity of BSS space");
1031             else
1032                 length += ins->oprs[0].offset;
1033             break;
1034
1035         case 0341:
1036             if (!ins->prefixes[PPS_WAIT])
1037                 ins->prefixes[PPS_WAIT] = P_WAIT;
1038             break;
1039
1040         case4(0344):
1041             length++;
1042             break;
1043
1044         case 0360:
1045             break;
1046
1047         case 0361:
1048         case 0362:
1049         case 0363:
1050             length++;
1051             break;
1052
1053         case 0364:
1054         case 0365:
1055             break;
1056
1057         case 0366:
1058         case 0367:
1059             length++;
1060             break;
1061
1062         case 0370:
1063         case 0371:
1064         case 0372:
1065             break;
1066
1067         case 0373:
1068             length++;
1069             break;
1070
1071         case4(0100):
1072         case4(0110):
1073         case4(0120):
1074         case4(0130):
1075         case4(0200):
1076         case4(0204):
1077         case4(0210):
1078         case4(0214):
1079         case4(0220):
1080         case4(0224):
1081         case4(0230):
1082         case4(0234):
1083             {
1084                 ea ea_data;
1085                 int rfield;
1086                 opflags_t rflags;
1087                 struct operand *opy = &ins->oprs[op2];
1088
1089                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1090
1091                 if (c <= 0177) {
1092                     /* pick rfield from operand b (opx) */
1093                     rflags = regflag(opx);
1094                     rfield = nasm_regvals[opx->basereg];
1095                 } else {
1096                     rflags = 0;
1097                     rfield = c & 7;
1098                 }
1099                 if (!process_ea(opy, &ea_data, bits,
1100                                 ins->addr_size, rfield, rflags)) {
1101                     errfunc(ERR_NONFATAL, "invalid effective address");
1102                     return -1;
1103                 } else {
1104                     ins->rex |= ea_data.rex;
1105                     length += ea_data.size;
1106                 }
1107             }
1108             break;
1109
1110         default:
1111             errfunc(ERR_PANIC, "internal instruction table corrupt"
1112                     ": instruction code \\%o (0x%02X) given", c, c);
1113             break;
1114         }
1115     }
1116
1117     ins->rex &= rex_mask;
1118
1119     if (ins->rex & REX_NH) {
1120         if (ins->rex & REX_H) {
1121             errfunc(ERR_NONFATAL, "instruction cannot use high registers");
1122             return -1;
1123         }
1124         ins->rex &= ~REX_P;     /* Don't force REX prefix due to high reg */
1125     }
1126
1127     if (ins->rex & REX_V) {
1128         int bad32 = REX_R|REX_W|REX_X|REX_B;
1129
1130         if (ins->rex & REX_H) {
1131             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1132             return -1;
1133         }
1134         switch (ins->vex_wlp & 030) {
1135         case 000:
1136         case 020:
1137             ins->rex &= ~REX_W;
1138             break;
1139         case 010:
1140             ins->rex |= REX_W;
1141             bad32 &= ~REX_W;
1142             break;
1143         case 030:
1144             /* Follow REX_W */
1145             break;
1146         }
1147
1148         if (bits != 64 && ((ins->rex & bad32) || ins->drexdst > 7)) {
1149             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1150             return -1;
1151         }
1152         if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_R|REX_B)))
1153             length += 3;
1154         else
1155             length += 2;
1156     } else if (ins->rex & REX_D) {
1157         if (ins->rex & REX_H) {
1158             errfunc(ERR_NONFATAL, "cannot use high register in drex instruction");
1159             return -1;
1160         }
1161         if (bits != 64 && ((ins->rex & (REX_R|REX_W|REX_X|REX_B)) ||
1162                            ins->drexdst > 7)) {
1163             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1164             return -1;
1165         }
1166         length++;
1167     } else if (ins->rex & REX_REAL) {
1168         if (ins->rex & REX_H) {
1169             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1170             return -1;
1171         } else if (bits == 64) {
1172             length++;
1173         } else if ((ins->rex & REX_L) &&
1174                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1175                    cpu >= IF_X86_64) {
1176             /* LOCK-as-REX.R */
1177             assert_no_prefix(ins, PPS_LREP);
1178             length++;
1179         } else {
1180             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1181             return -1;
1182         }
1183     }
1184
1185     return length;
1186 }
1187
1188 #define EMIT_REX()                                                      \
1189     if (!(ins->rex & (REX_D|REX_V)) && (ins->rex & REX_REAL) && (bits == 64)) { \
1190         ins->rex = (ins->rex & REX_REAL)|REX_P;                         \
1191         out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG); \
1192         ins->rex = 0;                                                   \
1193         offset += 1; \
1194     }
1195
1196 static void gencode(int32_t segment, int64_t offset, int bits,
1197                     insn * ins, const struct itemplate *temp,
1198                     int64_t insn_end)
1199 {
1200     static char condval[] = {   /* conditional opcodes */
1201         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1202         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1203         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1204     };
1205     uint8_t c;
1206     uint8_t bytes[4];
1207     int64_t size;
1208     int64_t data;
1209     int op1, op2;
1210     struct operand *opx;
1211     const uint8_t *codes = temp->code;
1212     uint8_t opex = 0;
1213
1214     while (*codes) {
1215         c = *codes++;
1216         op1 = (c & 3) + ((opex & 1) << 2);
1217         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
1218         opx = &ins->oprs[op1];
1219         opex = 0;               /* For the next iteration */
1220
1221         switch (c) {
1222         case 01:
1223         case 02:
1224         case 03:
1225         case 04:
1226             EMIT_REX();
1227             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1228             codes += c;
1229             offset += c;
1230             break;
1231
1232         case 05:
1233         case 06:
1234         case 07:
1235             opex = c;
1236             break;
1237
1238         case4(010):
1239             EMIT_REX();
1240             bytes[0] = *codes++ + (regval(opx) & 7);
1241             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1242             offset += 1;
1243             break;
1244
1245         case4(014):
1246             /* The test for BITS8 and SBYTE here is intended to avoid
1247                warning on optimizer actions due to SBYTE, while still
1248                warn on explicit BYTE directives.  Also warn, obviously,
1249                if the optimizer isn't enabled. */
1250             if (((opx->type & BITS8) ||
1251                  !(opx->type & temp->opd[op1] & BYTENESS)) &&
1252                 (opx->offset < -128 || opx->offset > 127)) {
1253                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1254                         "signed byte value exceeds bounds");
1255             }
1256             if (opx->segment != NO_SEG) {
1257                 data = opx->offset;
1258                 out(offset, segment, &data, OUT_ADDRESS, 1,
1259                     opx->segment, opx->wrt);
1260             } else {
1261                 bytes[0] = opx->offset;
1262                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1263                     NO_SEG);
1264             }
1265             offset += 1;
1266             break;
1267
1268         case4(020):
1269             if (opx->offset < -256 || opx->offset > 255) {
1270                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1271                         "byte value exceeds bounds");
1272             }
1273             if (opx->segment != NO_SEG) {
1274                 data = opx->offset;
1275                 out(offset, segment, &data, OUT_ADDRESS, 1,
1276                     opx->segment, opx->wrt);
1277             } else {
1278                 bytes[0] = opx->offset;
1279                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1280                     NO_SEG);
1281             }
1282             offset += 1;
1283             break;
1284
1285         case4(024):
1286             if (opx->offset < 0 || opx->offset > 255)
1287                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1288                         "unsigned byte value exceeds bounds");
1289             if (opx->segment != NO_SEG) {
1290                 data = opx->offset;
1291                 out(offset, segment, &data, OUT_ADDRESS, 1,
1292                     opx->segment, opx->wrt);
1293             } else {
1294                 bytes[0] = opx->offset;
1295                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1296                     NO_SEG);
1297             }
1298             offset += 1;
1299             break;
1300
1301         case4(030):
1302             warn_overflow_opd(opx, 2);
1303             data = opx->offset;
1304             out(offset, segment, &data, OUT_ADDRESS, 2,
1305                 opx->segment, opx->wrt);
1306             offset += 2;
1307             break;
1308
1309         case4(034):
1310             if (opx->type & (BITS16 | BITS32))
1311                 size = (opx->type & BITS16) ? 2 : 4;
1312             else
1313                 size = (bits == 16) ? 2 : 4;
1314             warn_overflow_opd(opx, size);
1315             data = opx->offset;
1316             out(offset, segment, &data, OUT_ADDRESS, size,
1317                 opx->segment, opx->wrt);
1318             offset += size;
1319             break;
1320
1321         case4(040):
1322             warn_overflow_opd(opx, 4);
1323             data = opx->offset;
1324             out(offset, segment, &data, OUT_ADDRESS, 4,
1325                 opx->segment, opx->wrt);
1326             offset += 4;
1327             break;
1328
1329         case4(044):
1330             data = opx->offset;
1331             size = ins->addr_size >> 3;
1332             warn_overflow_opd(opx, size);
1333             out(offset, segment, &data, OUT_ADDRESS, size,
1334                 opx->segment, opx->wrt);
1335             offset += size;
1336             break;
1337
1338         case4(050):
1339             if (opx->segment != segment)
1340                 errfunc(ERR_NONFATAL,
1341                         "short relative jump outside segment");
1342             data = opx->offset - insn_end;
1343             if (data > 127 || data < -128)
1344                 errfunc(ERR_NONFATAL, "short jump is out of range");
1345             bytes[0] = data;
1346             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1347             offset += 1;
1348             break;
1349
1350         case4(054):
1351             data = (int64_t)opx->offset;
1352             out(offset, segment, &data, OUT_ADDRESS, 8,
1353                 opx->segment, opx->wrt);
1354             offset += 8;
1355             break;
1356
1357         case4(060):
1358             if (opx->segment != segment) {
1359                 data = opx->offset;
1360                 out(offset, segment, &data,
1361                     OUT_REL2ADR, insn_end - offset,
1362                     opx->segment, opx->wrt);
1363             } else {
1364                 data = opx->offset - insn_end;
1365                 out(offset, segment, &data,
1366                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1367             }
1368             offset += 2;
1369             break;
1370
1371         case4(064):
1372             if (opx->type & (BITS16 | BITS32 | BITS64))
1373                 size = (opx->type & BITS16) ? 2 : 4;
1374             else
1375                 size = (bits == 16) ? 2 : 4;
1376             if (opx->segment != segment) {
1377                 data = opx->offset;
1378                 out(offset, segment, &data,
1379                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1380                     insn_end - offset, opx->segment, opx->wrt);
1381             } else {
1382                 data = opx->offset - insn_end;
1383                 out(offset, segment, &data,
1384                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1385             }
1386             offset += size;
1387             break;
1388
1389         case4(070):
1390             if (opx->segment != segment) {
1391                 data = opx->offset;
1392                 out(offset, segment, &data,
1393                     OUT_REL4ADR, insn_end - offset,
1394                     opx->segment, opx->wrt);
1395             } else {
1396                 data = opx->offset - insn_end;
1397                 out(offset, segment, &data,
1398                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1399             }
1400             offset += 4;
1401             break;
1402
1403         case4(074):
1404             if (opx->segment == NO_SEG)
1405                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1406                         " relocatable");
1407             data = 0;
1408             out(offset, segment, &data, OUT_ADDRESS, 2,
1409                 outfmt->segbase(1 + opx->segment),
1410                 opx->wrt);
1411             offset += 2;
1412             break;
1413
1414         case4(0140):
1415             data = opx->offset;
1416             warn_overflow_opd(opx, 2);
1417             if (is_sbyte16(opx)) {
1418                 bytes[0] = data;
1419                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1420                     NO_SEG);
1421                 offset++;
1422             } else {
1423                 out(offset, segment, &data, OUT_ADDRESS, 2,
1424                     opx->segment, opx->wrt);
1425                 offset += 2;
1426             }
1427             break;
1428
1429         case4(0144):
1430             EMIT_REX();
1431             bytes[0] = *codes++;
1432             if (is_sbyte16(opx))
1433                 bytes[0] |= 2;  /* s-bit */
1434             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1435             offset++;
1436             break;
1437
1438         case4(0150):
1439             data = opx->offset;
1440             warn_overflow_opd(opx, 4);
1441             if (is_sbyte32(opx)) {
1442                 bytes[0] = data;
1443                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1444                     NO_SEG);
1445                 offset++;
1446             } else {
1447                 out(offset, segment, &data, OUT_ADDRESS, 4,
1448                     opx->segment, opx->wrt);
1449                 offset += 4;
1450             }
1451             break;
1452
1453         case4(0154):
1454             EMIT_REX();
1455             bytes[0] = *codes++;
1456             if (is_sbyte32(opx))
1457                 bytes[0] |= 2;  /* s-bit */
1458             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1459             offset++;
1460             break;
1461
1462         case4(0160):
1463         case4(0164):
1464             break;
1465
1466         case 0171:
1467             bytes[0] =
1468                 (ins->drexdst << 4) |
1469                 (ins->rex & REX_OC ? 0x08 : 0) |
1470                 (ins->rex & (REX_R|REX_X|REX_B));
1471             ins->rex = 0;
1472             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1473             offset++;
1474             break;
1475
1476         case 0172:
1477             c = *codes++;
1478             opx = &ins->oprs[c >> 3];
1479             bytes[0] = nasm_regvals[opx->basereg] << 4;
1480             opx = &ins->oprs[c & 7];
1481             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1482                 errfunc(ERR_NONFATAL,
1483                         "non-absolute expression not permitted as argument %d",
1484                         c & 7);
1485             } else {
1486                 if (opx->offset & ~15) {
1487                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1488                             "four-bit argument exceeds bounds");
1489                 }
1490                 bytes[0] |= opx->offset & 15;
1491             }
1492             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1493             offset++;
1494             break;
1495
1496         case 0173:
1497             c = *codes++;
1498             opx = &ins->oprs[c >> 4];
1499             bytes[0] = nasm_regvals[opx->basereg] << 4;
1500             bytes[0] |= c & 15;
1501             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1502             offset++;
1503             break;
1504
1505         case 0174:
1506             c = *codes++;
1507             opx = &ins->oprs[c];
1508             bytes[0] = nasm_regvals[opx->basereg] << 4;
1509             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1510             offset++;
1511             break;
1512
1513         case4(0250):
1514             data = opx->offset;
1515             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1516                 (int32_t)data != (int64_t)data) {
1517                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1518                         "signed dword immediate exceeds bounds");
1519             }
1520             if (is_sbyte32(opx)) {
1521                 bytes[0] = data;
1522                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1523                     NO_SEG);
1524                 offset++;
1525             } else {
1526                 out(offset, segment, &data, OUT_ADDRESS, 4,
1527                     opx->segment, opx->wrt);
1528                 offset += 4;
1529             }
1530             break;
1531
1532         case4(0254):
1533             data = opx->offset;
1534             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1535                 (int32_t)data != (int64_t)data) {
1536                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1537                         "signed dword immediate exceeds bounds");
1538             }
1539             out(offset, segment, &data, OUT_ADDRESS, 4,
1540                 opx->segment, opx->wrt);
1541             offset += 4;
1542             break;
1543
1544         case4(0260):
1545         case 0270:
1546             codes += 2;
1547             if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1548                 bytes[0] = (ins->vex_cm >> 6) ? 0x8f : 0xc4;
1549                 bytes[1] = (ins->vex_cm & 31) | ((~ins->rex & 7) << 5);
1550                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1551                     ((~ins->drexdst & 15)<< 3) | (ins->vex_wlp & 07);
1552                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1553                 offset += 3;
1554             } else {
1555                 bytes[0] = 0xc5;
1556                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1557                     ((~ins->drexdst & 15) << 3) | (ins->vex_wlp & 07);
1558                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1559                 offset += 2;
1560             }
1561             break;
1562
1563         case4(0274):
1564         {
1565             uint64_t uv, um;
1566             int s;
1567
1568             if (ins->rex & REX_W)
1569                 s = 64;
1570             else if (ins->prefixes[PPS_OSIZE] == P_O16)
1571                 s = 16;
1572             else if (ins->prefixes[PPS_OSIZE] == P_O32)
1573                 s = 32;
1574             else
1575                 s = bits;
1576
1577             um = (uint64_t)2 << (s-1);
1578             uv = opx->offset;
1579
1580             if (uv > 127 && uv < (uint64_t)-128 &&
1581                 (uv < um-128 || uv > um-1)) {
1582                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1583                         "signed byte value exceeds bounds");
1584             }
1585             if (opx->segment != NO_SEG) {
1586                 data = uv;
1587                 out(offset, segment, &data, OUT_ADDRESS, 1,
1588                     opx->segment, opx->wrt);
1589             } else {
1590                 bytes[0] = uv;
1591                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1592                     NO_SEG);
1593             }
1594             offset += 1;
1595             break;
1596         }
1597
1598         case4(0300):
1599             break;
1600
1601         case 0310:
1602             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1603                 *bytes = 0x67;
1604                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1605                 offset += 1;
1606             } else
1607                 offset += 0;
1608             break;
1609
1610         case 0311:
1611             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1612                 *bytes = 0x67;
1613                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1614                 offset += 1;
1615             } else
1616                 offset += 0;
1617             break;
1618
1619         case 0312:
1620             break;
1621
1622         case 0313:
1623             ins->rex = 0;
1624             break;
1625
1626         case4(0314):
1627             break;
1628
1629         case 0320:
1630             if (bits != 16) {
1631                 *bytes = 0x66;
1632                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1633                 offset += 1;
1634             } else
1635                 offset += 0;
1636             break;
1637
1638         case 0321:
1639             if (bits == 16) {
1640                 *bytes = 0x66;
1641                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1642                 offset += 1;
1643             } else
1644                 offset += 0;
1645             break;
1646
1647         case 0322:
1648         case 0323:
1649             break;
1650
1651         case 0324:
1652             ins->rex |= REX_W;
1653             break;
1654
1655         case 0325:
1656             break;
1657
1658         case 0330:
1659             *bytes = *codes++ ^ condval[ins->condition];
1660             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1661             offset += 1;
1662             break;
1663
1664         case 0331:
1665             break;
1666
1667         case 0332:
1668         case 0333:
1669             *bytes = c - 0332 + 0xF2;
1670             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1671             offset += 1;
1672             break;
1673
1674         case 0334:
1675             if (ins->rex & REX_R) {
1676                 *bytes = 0xF0;
1677                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1678                 offset += 1;
1679             }
1680             ins->rex &= ~(REX_L|REX_R);
1681             break;
1682
1683         case 0335:
1684             break;
1685
1686         case 0336:
1687         case 0337:
1688             break;
1689
1690         case 0340:
1691             if (ins->oprs[0].segment != NO_SEG)
1692                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1693             else {
1694                 int64_t size = ins->oprs[0].offset;
1695                 if (size > 0)
1696                     out(offset, segment, NULL,
1697                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1698                 offset += size;
1699             }
1700             break;
1701
1702         case 0341:
1703             break;
1704
1705         case 0344:
1706         case 0345:
1707             bytes[0] = c & 1;
1708             switch (ins->oprs[0].basereg) {
1709             case R_CS:
1710                 bytes[0] += 0x0E;
1711                 break;
1712             case R_DS:
1713                 bytes[0] += 0x1E;
1714                 break;
1715             case R_ES:
1716                 bytes[0] += 0x06;
1717                 break;
1718             case R_SS:
1719                 bytes[0] += 0x16;
1720                 break;
1721             default:
1722                 errfunc(ERR_PANIC,
1723                         "bizarre 8086 segment register received");
1724             }
1725             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1726             offset++;
1727             break;
1728
1729         case 0346:
1730         case 0347:
1731             bytes[0] = c & 1;
1732             switch (ins->oprs[0].basereg) {
1733             case R_FS:
1734                 bytes[0] += 0xA0;
1735                 break;
1736             case R_GS:
1737                 bytes[0] += 0xA8;
1738                 break;
1739             default:
1740                 errfunc(ERR_PANIC,
1741                         "bizarre 386 segment register received");
1742             }
1743             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1744             offset++;
1745             break;
1746
1747         case 0360:
1748             break;
1749
1750         case 0361:
1751             bytes[0] = 0x66;
1752             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1753             offset += 1;
1754             break;
1755
1756         case 0362:
1757         case 0363:
1758             bytes[0] = c - 0362 + 0xf2;
1759             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1760             offset += 1;
1761             break;
1762
1763         case 0364:
1764         case 0365:
1765             break;
1766
1767         case 0366:
1768         case 0367:
1769             *bytes = c - 0366 + 0x66;
1770             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1771             offset += 1;
1772             break;
1773
1774         case 0370:
1775         case 0371:
1776         case 0372:
1777             break;
1778
1779         case 0373:
1780             *bytes = bits == 16 ? 3 : 5;
1781             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1782             offset += 1;
1783             break;
1784
1785         case4(0100):
1786         case4(0110):
1787         case4(0120):
1788         case4(0130):
1789         case4(0200):
1790         case4(0204):
1791         case4(0210):
1792         case4(0214):
1793         case4(0220):
1794         case4(0224):
1795         case4(0230):
1796         case4(0234):
1797             {
1798                 ea ea_data;
1799                 int rfield;
1800                 opflags_t rflags;
1801                 uint8_t *p;
1802                 int32_t s;
1803                 enum out_type type;
1804                 struct operand *opy = &ins->oprs[op2];
1805
1806                 if (c <= 0177) {
1807                     /* pick rfield from operand b (opx) */
1808                     rflags = regflag(opx);
1809                     rfield = nasm_regvals[opx->basereg];
1810                 } else {
1811                     /* rfield is constant */
1812                     rflags = 0;
1813                     rfield = c & 7;
1814                 }
1815
1816                 if (!process_ea(opy, &ea_data, bits, ins->addr_size,
1817                                 rfield, rflags)) {
1818                     errfunc(ERR_NONFATAL, "invalid effective address");
1819                 }
1820
1821
1822                 p = bytes;
1823                 *p++ = ea_data.modrm;
1824                 if (ea_data.sib_present)
1825                     *p++ = ea_data.sib;
1826
1827                 /* DREX suffixes come between the SIB and the displacement */
1828                 if (ins->rex & REX_D) {
1829                     *p++ = (ins->drexdst << 4) |
1830                            (ins->rex & REX_OC ? 0x08 : 0) |
1831                            (ins->rex & (REX_R|REX_X|REX_B));
1832                     ins->rex = 0;
1833                 }
1834
1835                 s = p - bytes;
1836                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1837
1838                 /*
1839                  * Make sure the address gets the right offset in case
1840                  * the line breaks in the .lst file (BR 1197827)
1841                  */
1842                 offset += s;
1843                 s = 0;
1844
1845                 switch (ea_data.bytes) {
1846                 case 0:
1847                     break;
1848                 case 1:
1849                 case 2:
1850                 case 4:
1851                 case 8:
1852                     data = opy->offset;
1853                     s += ea_data.bytes;
1854                     if (ea_data.rip) {
1855                         if (opy->segment == segment) {
1856                             data -= insn_end;
1857                             if (overflow_signed(data, ea_data.bytes))
1858                                 warn_overflow(ERR_PASS2, ea_data.bytes);
1859                             out(offset, segment, &data, OUT_ADDRESS,
1860                                 ea_data.bytes, NO_SEG, NO_SEG);
1861                         } else {
1862                             /* overflow check in output/linker? */
1863                             out(offset, segment, &data, OUT_REL4ADR,
1864                                 insn_end - offset, opy->segment, opy->wrt);
1865                         }
1866                     } else {
1867                         if (overflow_general(opy->offset, ins->addr_size >> 3) ||
1868                             signed_bits(opy->offset, ins->addr_size) !=
1869                             signed_bits(opy->offset, ea_data.bytes * 8))
1870                             warn_overflow(ERR_PASS2, ea_data.bytes);
1871
1872                         type = OUT_ADDRESS;
1873                         out(offset, segment, &data, OUT_ADDRESS,
1874                             ea_data.bytes, opy->segment, opy->wrt);
1875                     }
1876                     break;
1877                 default:
1878                     /* Impossible! */
1879                     errfunc(ERR_PANIC,
1880                             "Invalid amount of bytes (%d) for offset?!",
1881                             ea_data.bytes);
1882                     break;
1883                 }
1884                 offset += s;
1885             }
1886             break;
1887
1888         default:
1889             errfunc(ERR_PANIC, "internal instruction table corrupt"
1890                     ": instruction code \\%o (0x%02X) given", c, c);
1891             break;
1892         }
1893     }
1894 }
1895
1896 static opflags_t regflag(const operand * o)
1897 {
1898     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1899         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1900     }
1901     return nasm_reg_flags[o->basereg];
1902 }
1903
1904 static int32_t regval(const operand * o)
1905 {
1906     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1907         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1908     }
1909     return nasm_regvals[o->basereg];
1910 }
1911
1912 static int op_rexflags(const operand * o, int mask)
1913 {
1914     opflags_t flags;
1915     int val;
1916
1917     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1918         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1919     }
1920
1921     flags = nasm_reg_flags[o->basereg];
1922     val = nasm_regvals[o->basereg];
1923
1924     return rexflags(val, flags, mask);
1925 }
1926
1927 static int rexflags(int val, opflags_t flags, int mask)
1928 {
1929     int rex = 0;
1930
1931     if (val >= 8)
1932         rex |= REX_B|REX_X|REX_R;
1933     if (flags & BITS64)
1934         rex |= REX_W;
1935     if (!(REG_HIGH & ~flags))   /* AH, CH, DH, BH */
1936         rex |= REX_H;
1937     else if (!(REG8 & ~flags) && val >= 4) /* SPL, BPL, SIL, DIL */
1938         rex |= REX_P;
1939
1940     return rex & mask;
1941 }
1942
1943 static enum match_result find_match(const struct itemplate **tempp,
1944                                     insn *instruction,
1945                                     int32_t segment, int64_t offset, int bits)
1946 {
1947     const struct itemplate *temp;
1948     enum match_result m, merr;
1949     opflags_t xsizeflags[MAX_OPERANDS];
1950     bool opsizemissing = false;
1951     int i;
1952
1953     for (i = 0; i < instruction->operands; i++)
1954         xsizeflags[i] = instruction->oprs[i].type & SIZE_MASK;
1955
1956     merr = MERR_INVALOP;
1957
1958     for (temp = nasm_instructions[instruction->opcode];
1959          temp->opcode != I_none; temp++) {
1960         m = matches(temp, instruction, bits);
1961         if (m == MOK_JUMP) {
1962             if (jmp_match(segment, offset, bits, instruction, temp->code))
1963                 m = MOK_GOOD;
1964             else
1965                 m = MERR_INVALOP;
1966         } else if (m == MERR_OPSIZEMISSING &&
1967                    (temp->flags & IF_SMASK) != IF_SX) {
1968             /*
1969              * Missing operand size and a candidate for fuzzy matching...
1970              */
1971             for (i = 0; i < temp->operands; i++) {
1972                 if ((temp->opd[i] & SAME_AS) == 0)
1973                     xsizeflags[i] |= temp->opd[i] & SIZE_MASK;
1974             }
1975             opsizemissing = true;
1976         }
1977         if (m > merr)
1978             merr = m;
1979         if (merr == MOK_GOOD)
1980             goto done;
1981     }
1982
1983     /* No match, but see if we can get a fuzzy operand size match... */
1984     if (!opsizemissing)
1985         goto done;
1986
1987     for (i = 0; i < instruction->operands; i++) {
1988         /*
1989          * We ignore extrinsic operand sizes on registers, so we should
1990          * never try to fuzzy-match on them.  This also resolves the case
1991          * when we have e.g. "xmmrm128" in two different positions.
1992          */
1993         if (is_class(REGISTER, instruction->oprs[i].type))
1994             continue;
1995
1996         /* This tests if xsizeflags[i] has more than one bit set */
1997         if ((xsizeflags[i] & (xsizeflags[i]-1)))
1998             goto done;          /* No luck */
1999
2000         instruction->oprs[i].type |= xsizeflags[i]; /* Set the size */
2001     }
2002
2003     /* Try matching again... */
2004     for (temp = nasm_instructions[instruction->opcode];
2005          temp->opcode != I_none; temp++) {
2006         m = matches(temp, instruction, bits);
2007         if (m == MOK_JUMP) {
2008             if (jmp_match(segment, offset, bits, instruction, temp->code))
2009                 m = MOK_GOOD;
2010             else
2011                 m = MERR_INVALOP;
2012         }
2013         if (m > merr)
2014             merr = m;
2015         if (merr == MOK_GOOD)
2016             goto done;
2017     }
2018
2019 done:
2020     *tempp = temp;
2021     return merr;
2022 }
2023
2024 static enum match_result matches(const struct itemplate *itemp,
2025                                  insn *instruction, int bits)
2026 {
2027     int i, size[MAX_OPERANDS], asize, oprs;
2028     bool opsizemissing = false;
2029
2030     /*
2031      * Check the opcode
2032      */
2033     if (itemp->opcode != instruction->opcode)
2034         return MERR_INVALOP;
2035
2036     /*
2037      * Count the operands
2038      */
2039     if (itemp->operands != instruction->operands)
2040         return MERR_INVALOP;
2041
2042     /*
2043      * Check that no spurious colons or TOs are present
2044      */
2045     for (i = 0; i < itemp->operands; i++)
2046         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
2047             return MERR_INVALOP;
2048
2049     /*
2050      * Process size flags
2051      */
2052     switch (itemp->flags & IF_SMASK) {
2053     case IF_SB:
2054         asize = BITS8;
2055         break;
2056     case IF_SW:
2057         asize = BITS16;
2058         break;
2059     case IF_SD:
2060         asize = BITS32;
2061         break;
2062     case IF_SQ:
2063         asize = BITS64;
2064         break;
2065     case IF_SO:
2066         asize = BITS128;
2067         break;
2068     case IF_SY:
2069         asize = BITS256;
2070         break;
2071     case IF_SZ:
2072         switch (bits) {
2073         case 16:
2074             asize = BITS16;
2075             break;
2076         case 32:
2077             asize = BITS32;
2078             break;
2079         case 64:
2080             asize = BITS64;
2081             break;
2082         default:
2083             asize = 0;
2084             break;
2085         }
2086         break;
2087     default:
2088         asize = 0;
2089         break;
2090     }
2091
2092     if (itemp->flags & IF_ARMASK) {
2093         /* S- flags only apply to a specific operand */
2094         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
2095         memset(size, 0, sizeof size);
2096         size[i] = asize;
2097     } else {
2098         /* S- flags apply to all operands */
2099         for (i = 0; i < MAX_OPERANDS; i++)
2100             size[i] = asize;
2101     }
2102
2103     /*
2104      * Check that the operand flags all match up,
2105      * it's a bit tricky so lets be verbose:
2106      *
2107      * 1) Find out the size of operand. If instruction
2108      *    doesn't have one specified -- we're trying to
2109      *    guess it either from template (IF_S* flag) or
2110      *    from code bits.
2111      *
2112      * 2) If template operand (i) has SAME_AS flag [used for registers only]
2113      *    (ie the same operand as was specified somewhere in template, and
2114      *    this referred operand index is being achieved via ~SAME_AS)
2115      *    we are to be sure that both registers (in template and instruction)
2116      *    do exactly match.
2117      *
2118      * 3) If template operand do not match the instruction OR
2119      *    template has an operand size specified AND this size differ
2120      *    from which instruction has (perhaps we got it from code bits)
2121      *    we are:
2122      *      a)  Check that only size of instruction and operand is differ
2123      *          other characteristics do match
2124      *      b)  Perhaps it's a register specified in instruction so
2125      *          for such a case we just mark that operand as "size
2126      *          missing" and this will turn on fuzzy operand size
2127      *          logic facility (handled by a caller)
2128      */
2129     for (i = 0; i < itemp->operands; i++) {
2130         opflags_t type = instruction->oprs[i].type;
2131         if (!(type & SIZE_MASK))
2132             type |= size[i];
2133
2134         if (itemp->opd[i] & SAME_AS) {
2135             int j = itemp->opd[i] & ~SAME_AS;
2136             if (type != instruction->oprs[j].type ||
2137                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2138                 return MERR_INVALOP;
2139         } else if (itemp->opd[i] & ~type ||
2140             ((itemp->opd[i] & SIZE_MASK) &&
2141              ((itemp->opd[i] ^ type) & SIZE_MASK))) {
2142             if ((itemp->opd[i] & ~type & ~SIZE_MASK) || (type & SIZE_MASK)) {
2143                 return MERR_INVALOP;
2144             } else if (!is_class(REGISTER, type)) {
2145                 /*
2146                  * Note: we don't honor extrinsic operand sizes for registers,
2147                  * so "missing operand size" for a register should be
2148                  * considered a wildcard match rather than an error.
2149                  */
2150                 opsizemissing = true;
2151             }
2152         }
2153     }
2154
2155     if (opsizemissing)
2156         return MERR_OPSIZEMISSING;
2157
2158     /*
2159      * Check operand sizes
2160      */
2161     if (itemp->flags & (IF_SM | IF_SM2)) {
2162         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2163         for (i = 0; i < oprs; i++) {
2164             asize = itemp->opd[i] & SIZE_MASK;
2165             if (asize) {
2166                 for (i = 0; i < oprs; i++)
2167                     size[i] = asize;
2168                 break;
2169             }
2170         }
2171     } else {
2172         oprs = itemp->operands;
2173     }
2174
2175     for (i = 0; i < itemp->operands; i++) {
2176         if (!(itemp->opd[i] & SIZE_MASK) &&
2177             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2178             return MERR_OPSIZEMISMATCH;
2179     }
2180
2181     /*
2182      * Check template is okay at the set cpu level
2183      */
2184     if (((itemp->flags & IF_PLEVEL) > cpu))
2185         return MERR_BADCPU;
2186
2187     /*
2188      * Verify the appropriate long mode flag.
2189      */
2190     if ((itemp->flags & (bits == 64 ? IF_NOLONG : IF_LONG)))
2191         return MERR_BADMODE;
2192
2193     /*
2194      * Check if special handling needed for Jumps
2195      */
2196     if ((itemp->code[0] & 0374) == 0370)
2197         return MOK_JUMP;
2198
2199     return MOK_GOOD;
2200 }
2201
2202 static ea *process_ea(operand * input, ea * output, int bits,
2203                       int addrbits, int rfield, opflags_t rflags)
2204 {
2205     bool forw_ref = !!(input->opflags & OPFLAG_UNKNOWN);
2206
2207     output->rip = false;
2208
2209     /* REX flags for the rfield operand */
2210     output->rex |= rexflags(rfield, rflags, REX_R|REX_P|REX_W|REX_H);
2211
2212     if (is_class(REGISTER, input->type)) {  /* register direct */
2213         int i;
2214         opflags_t f;
2215
2216         if (input->basereg < EXPR_REG_START /* Verify as Register */
2217             || input->basereg >= REG_ENUM_LIMIT)
2218             return NULL;
2219         f = regflag(input);
2220         i = nasm_regvals[input->basereg];
2221
2222         if (REG_EA & ~f)
2223             return NULL;        /* Invalid EA register */
2224
2225         output->rex |= op_rexflags(input, REX_B|REX_P|REX_W|REX_H);
2226
2227         output->sib_present = false;             /* no SIB necessary */
2228         output->bytes = 0;  /* no offset necessary either */
2229         output->modrm = 0xC0 | ((rfield & 7) << 3) | (i & 7);
2230     } else {                    /* it's a memory reference */
2231         if (input->basereg == -1
2232             && (input->indexreg == -1 || input->scale == 0)) {
2233             /* it's a pure offset */
2234
2235             if (bits == 64 && ((input->type & IP_REL) == IP_REL) &&
2236                 input->segment == NO_SEG) {
2237                 nasm_error(ERR_WARNING | ERR_PASS1, "absolute address can not be RIP-relative");
2238                 input->type &= ~IP_REL;
2239                 input->type |= MEMORY;
2240             }
2241
2242             if (input->eaflags & EAF_BYTEOFFS ||
2243                 (input->eaflags & EAF_WORDOFFS &&
2244                  input->disp_size != (addrbits != 16 ? 32 : 16))) {
2245                 nasm_error(ERR_WARNING | ERR_PASS1, "displacement size ignored on absolute address");
2246             }
2247
2248             if (bits == 64 && (~input->type & IP_REL)) {
2249               int scale, index, base;
2250               output->sib_present = true;
2251               scale = 0;
2252               index = 4;
2253               base = 5;
2254               output->sib = (scale << 6) | (index << 3) | base;
2255               output->bytes = 4;
2256               output->modrm = 4 | ((rfield & 7) << 3);
2257               output->rip = false;
2258             } else {
2259               output->sib_present = false;
2260               output->bytes = (addrbits != 16 ? 4 : 2);
2261               output->modrm = (addrbits != 16 ? 5 : 6) | ((rfield & 7) << 3);
2262               output->rip = bits == 64;
2263             }
2264         } else {                /* it's an indirection */
2265             int i = input->indexreg, b = input->basereg, s = input->scale;
2266             int32_t seg = input->segment;
2267             int hb = input->hintbase, ht = input->hinttype;
2268             int t, it, bt;              /* register numbers */
2269             opflags_t x, ix, bx;        /* register flags */
2270
2271             if (s == 0)
2272                 i = -1;         /* make this easy, at least */
2273
2274             if (i >= EXPR_REG_START && i < REG_ENUM_LIMIT) {
2275                 it = nasm_regvals[i];
2276                 ix = nasm_reg_flags[i];
2277             } else {
2278                 it = -1;
2279                 ix = 0;
2280             }
2281
2282             if (b >= EXPR_REG_START && b < REG_ENUM_LIMIT) {
2283                 bt = nasm_regvals[b];
2284                 bx = nasm_reg_flags[b];
2285             } else {
2286                 bt = -1;
2287                 bx = 0;
2288             }
2289
2290             /* check for a 32/64-bit memory reference... */
2291             if ((ix|bx) & (BITS32|BITS64)) {
2292                 /* it must be a 32/64-bit memory reference. Firstly we have
2293                  * to check that all registers involved are type E/Rxx. */
2294                 int32_t sok = BITS32|BITS64, o = input->offset;
2295
2296                 if (it != -1) {
2297                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2298                         sok &= ix;
2299                     else
2300                         return NULL;
2301                 }
2302
2303                 if (bt != -1) {
2304                     if (REG_GPR & ~bx)
2305                         return NULL; /* Invalid register */
2306                     if (~sok & bx & SIZE_MASK)
2307                         return NULL; /* Invalid size */
2308                     sok &= bx;
2309                 }
2310
2311                 /* While we're here, ensure the user didn't specify
2312                    WORD or QWORD. */
2313                 if (input->disp_size == 16 || input->disp_size == 64)
2314                     return NULL;
2315
2316                 if (addrbits == 16 ||
2317                     (addrbits == 32 && !(sok & BITS32)) ||
2318                     (addrbits == 64 && !(sok & BITS64)))
2319                     return NULL;
2320
2321                 /* now reorganize base/index */
2322                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2323                     ((hb == b && ht == EAH_NOTBASE)
2324                      || (hb == i && ht == EAH_MAKEBASE))) {
2325                     /* swap if hints say so */
2326                     t = bt, bt = it, it = t;
2327                     x = bx, bx = ix, ix = x;
2328                 }
2329                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2330                     bt = -1, bx = 0, s++;
2331                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2332                     /* make single reg base, unless hint */
2333                     bt = it, bx = ix, it = -1, ix = 0;
2334                 }
2335                 if (((s == 2 && it != REG_NUM_ESP
2336                       && !(input->eaflags & EAF_TIMESTWO)) || s == 3
2337                      || s == 5 || s == 9) && bt == -1)
2338                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2339                 if (it == -1 && (bt & 7) != REG_NUM_ESP
2340                     && (input->eaflags & EAF_TIMESTWO))
2341                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2342                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2343                 if (s == 1 && it == REG_NUM_ESP) {
2344                     /* swap ESP into base if scale is 1 */
2345                     t = it, it = bt, bt = t;
2346                     x = ix, ix = bx, bx = x;
2347                 }
2348                 if (it == REG_NUM_ESP
2349                     || (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2350                     return NULL;        /* wrong, for various reasons */
2351
2352                 output->rex |= rexflags(it, ix, REX_X);
2353                 output->rex |= rexflags(bt, bx, REX_B);
2354
2355                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2356                     /* no SIB needed */
2357                     int mod, rm;
2358
2359                     if (bt == -1) {
2360                         rm = 5;
2361                         mod = 0;
2362                     } else {
2363                         rm = (bt & 7);
2364                         if (rm != REG_NUM_EBP && o == 0 &&
2365                                 seg == NO_SEG && !forw_ref &&
2366                                 !(input->eaflags &
2367                                   (EAF_BYTEOFFS | EAF_WORDOFFS)))
2368                             mod = 0;
2369                         else if (input->eaflags & EAF_BYTEOFFS ||
2370                                  (o >= -128 && o <= 127 && seg == NO_SEG
2371                                   && !forw_ref
2372                                   && !(input->eaflags & EAF_WORDOFFS)))
2373                             mod = 1;
2374                         else
2375                             mod = 2;
2376                     }
2377
2378                     output->sib_present = false;
2379                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2380                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2381                 } else {
2382                     /* we need a SIB */
2383                     int mod, scale, index, base;
2384
2385                     if (it == -1)
2386                         index = 4, s = 1;
2387                     else
2388                         index = (it & 7);
2389
2390                     switch (s) {
2391                     case 1:
2392                         scale = 0;
2393                         break;
2394                     case 2:
2395                         scale = 1;
2396                         break;
2397                     case 4:
2398                         scale = 2;
2399                         break;
2400                     case 8:
2401                         scale = 3;
2402                         break;
2403                     default:   /* then what the smeg is it? */
2404                         return NULL;    /* panic */
2405                     }
2406
2407                     if (bt == -1) {
2408                         base = 5;
2409                         mod = 0;
2410                     } else {
2411                         base = (bt & 7);
2412                         if (base != REG_NUM_EBP && o == 0 &&
2413                                     seg == NO_SEG && !forw_ref &&
2414                                     !(input->eaflags &
2415                                       (EAF_BYTEOFFS | EAF_WORDOFFS)))
2416                             mod = 0;
2417                         else if (input->eaflags & EAF_BYTEOFFS ||
2418                                  (o >= -128 && o <= 127 && seg == NO_SEG
2419                                   && !forw_ref
2420                                   && !(input->eaflags & EAF_WORDOFFS)))
2421                             mod = 1;
2422                         else
2423                             mod = 2;
2424                     }
2425
2426                     output->sib_present = true;
2427                     output->bytes =  (bt == -1 || mod == 2 ? 4 : mod);
2428                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | 4;
2429                     output->sib = (scale << 6) | (index << 3) | base;
2430                 }
2431             } else {            /* it's 16-bit */
2432                 int mod, rm;
2433                 int16_t o = input->offset;
2434
2435                 /* check for 64-bit long mode */
2436                 if (addrbits == 64)
2437                     return NULL;
2438
2439                 /* check all registers are BX, BP, SI or DI */
2440                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI
2441                      && b != R_DI) || (i != -1 && i != R_BP && i != R_BX
2442                                        && i != R_SI && i != R_DI))
2443                     return NULL;
2444
2445                 /* ensure the user didn't specify DWORD/QWORD */
2446                 if (input->disp_size == 32 || input->disp_size == 64)
2447                     return NULL;
2448
2449                 if (s != 1 && i != -1)
2450                     return NULL;        /* no can do, in 16-bit EA */
2451                 if (b == -1 && i != -1) {
2452                     int tmp = b;
2453                     b = i;
2454                     i = tmp;
2455                 }               /* swap */
2456                 if ((b == R_SI || b == R_DI) && i != -1) {
2457                     int tmp = b;
2458                     b = i;
2459                     i = tmp;
2460                 }
2461                 /* have BX/BP as base, SI/DI index */
2462                 if (b == i)
2463                     return NULL;        /* shouldn't ever happen, in theory */
2464                 if (i != -1 && b != -1 &&
2465                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2466                     return NULL;        /* invalid combinations */
2467                 if (b == -1)    /* pure offset: handled above */
2468                     return NULL;        /* so if it gets to here, panic! */
2469
2470                 rm = -1;
2471                 if (i != -1)
2472                     switch (i * 256 + b) {
2473                     case R_SI * 256 + R_BX:
2474                         rm = 0;
2475                         break;
2476                     case R_DI * 256 + R_BX:
2477                         rm = 1;
2478                         break;
2479                     case R_SI * 256 + R_BP:
2480                         rm = 2;
2481                         break;
2482                     case R_DI * 256 + R_BP:
2483                         rm = 3;
2484                         break;
2485                 } else
2486                     switch (b) {
2487                     case R_SI:
2488                         rm = 4;
2489                         break;
2490                     case R_DI:
2491                         rm = 5;
2492                         break;
2493                     case R_BP:
2494                         rm = 6;
2495                         break;
2496                     case R_BX:
2497                         rm = 7;
2498                         break;
2499                     }
2500                 if (rm == -1)   /* can't happen, in theory */
2501                     return NULL;        /* so panic if it does */
2502
2503                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2504                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2505                     mod = 0;
2506                 else if (input->eaflags & EAF_BYTEOFFS ||
2507                          (o >= -128 && o <= 127 && seg == NO_SEG
2508                           && !forw_ref
2509                           && !(input->eaflags & EAF_WORDOFFS)))
2510                     mod = 1;
2511                 else
2512                     mod = 2;
2513
2514                 output->sib_present = false;    /* no SIB - it's 16-bit */
2515                 output->bytes = mod;    /* bytes of offset needed */
2516                 output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2517             }
2518         }
2519     }
2520
2521     output->size = 1 + output->sib_present + output->bytes;
2522     return output;
2523 }
2524
2525 static void add_asp(insn *ins, int addrbits)
2526 {
2527     int j, valid;
2528     int defdisp;
2529
2530     valid = (addrbits == 64) ? 64|32 : 32|16;
2531
2532     switch (ins->prefixes[PPS_ASIZE]) {
2533     case P_A16:
2534         valid &= 16;
2535         break;
2536     case P_A32:
2537         valid &= 32;
2538         break;
2539     case P_A64:
2540         valid &= 64;
2541         break;
2542     case P_ASP:
2543         valid &= (addrbits == 32) ? 16 : 32;
2544         break;
2545     default:
2546         break;
2547     }
2548
2549     for (j = 0; j < ins->operands; j++) {
2550         if (is_class(MEMORY, ins->oprs[j].type)) {
2551             opflags_t i, b;
2552
2553             /* Verify as Register */
2554             if (ins->oprs[j].indexreg < EXPR_REG_START
2555                 || ins->oprs[j].indexreg >= REG_ENUM_LIMIT)
2556                 i = 0;
2557             else
2558                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2559
2560             /* Verify as Register */
2561             if (ins->oprs[j].basereg < EXPR_REG_START
2562                 || ins->oprs[j].basereg >= REG_ENUM_LIMIT)
2563                 b = 0;
2564             else
2565                 b = nasm_reg_flags[ins->oprs[j].basereg];
2566
2567             if (ins->oprs[j].scale == 0)
2568                 i = 0;
2569
2570             if (!i && !b) {
2571                 int ds = ins->oprs[j].disp_size;
2572                 if ((addrbits != 64 && ds > 8) ||
2573                     (addrbits == 64 && ds == 16))
2574                     valid &= ds;
2575             } else {
2576                 if (!(REG16 & ~b))
2577                     valid &= 16;
2578                 if (!(REG32 & ~b))
2579                     valid &= 32;
2580                 if (!(REG64 & ~b))
2581                     valid &= 64;
2582
2583                 if (!(REG16 & ~i))
2584                     valid &= 16;
2585                 if (!(REG32 & ~i))
2586                     valid &= 32;
2587                 if (!(REG64 & ~i))
2588                     valid &= 64;
2589             }
2590         }
2591     }
2592
2593     if (valid & addrbits) {
2594         ins->addr_size = addrbits;
2595     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2596         /* Add an address size prefix */
2597         enum prefixes pref = (addrbits == 32) ? P_A16 : P_A32;
2598         ins->prefixes[PPS_ASIZE] = pref;
2599         ins->addr_size = (addrbits == 32) ? 16 : 32;
2600     } else {
2601         /* Impossible... */
2602         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2603         ins->addr_size = addrbits; /* Error recovery */
2604     }
2605
2606     defdisp = ins->addr_size == 16 ? 16 : 32;
2607
2608     for (j = 0; j < ins->operands; j++) {
2609         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2610             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp)
2611             != ins->addr_size) {
2612             /* mem_offs sizes must match the address size; if not,
2613                strip the MEM_OFFS bit and match only EA instructions */
2614             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2615         }
2616     }
2617 }