assemble.c

   1 /* ----------------------------------------------------------------------- *
   2  *
   3  *   Copyright 1996-2010 The NASM Authors - All Rights Reserved
   4  *   See the file AUTHORS included with the NASM distribution for
   5  *   the specific copyright holders.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following
   9  *   conditions are met:
  10  *
  11  *   * Redistributions of source code must retain the above copyright
  12  *     notice, this list of conditions and the following disclaimer.
  13  *   * Redistributions in binary form must reproduce the above
  14  *     copyright notice, this list of conditions and the following
  15  *     disclaimer in the documentation and/or other materials provided
  16  *     with the distribution.
  17  *
  18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  19  *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  20  *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  21  *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22  *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  23  *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24  *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  25  *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28  *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  29  *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  30  *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31  *
  32  * ----------------------------------------------------------------------- */
  33
  34 /*
  35  * assemble.c   code generation for the Netwide Assembler
  36  *
  37  * the actual codes (C syntax, i.e. octal):
  38  * \0            - terminates the code. (Unless it's a literal of course.)
  39  * \1..\4        - that many literal bytes follow in the code stream
  40  * \5            - add 4 to the primary operand number (b, low octdigit)
  41  * \6            - add 4 to the secondary operand number (a, middle octdigit)
  42  * \7            - add 4 to both the primary and the secondary operand number
  43  * \10..\13      - a literal byte follows in the code stream, to be added
  44  *                 to the register value of operand 0..3
  45  * \14..\17      - a signed byte immediate operand, from operand 0..3
  46  * \20..\23      - a byte immediate operand, from operand 0..3
  47  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  48  * \30..\33      - a word immediate operand, from operand 0..3
  49  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  50  *                 assembly mode or the operand-size override on the operand
  51  * \40..\43      - a long immediate operand, from operand 0..3
  52  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  53  *                 depending on the address size of the instruction.
  54  * \50..\53      - a byte relative operand, from operand 0..3
  55  * \54..\57      - a qword immediate operand, from operand 0..3
  56  * \60..\63      - a word relative operand, from operand 0..3
  57  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  58  *                 assembly mode or the operand-size override on the operand
  59  * \70..\73      - a long relative operand, from operand 0..3
  60  * \74..\77      - a word constant, from the _segment_ part of operand 0..3
  61  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  62  *                 field the register value of operand b.
  63  * \140..\143    - an immediate word or signed byte for operand 0..3
  64  * \144..\147    - or 2 (s-field) into opcode byte if operand 0..3
  65  *                  is a signed byte rather than a word.  Opcode byte follows.
  66  * \150..\153    - an immediate dword or signed byte for operand 0..3
  67  * \154..\157    - or 2 (s-field) into opcode byte if operand 0..3
  68  *                  is a signed byte rather than a dword.  Opcode byte follows.
  69  * \160..\163    - this instruction uses DREX rather than REX, with the
  70  *                 OC0 field set to 0, and the dest field taken from
  71  *                 operand 0..3.
  72  * \164..\167    - this instruction uses DREX rather than REX, with the
  73  *                 OC0 field set to 1, and the dest field taken from
  74  *                 operand 0..3.
  75  * \171          - placement of DREX suffix in the absence of an EA
  76  * \172\ab       - the register number from operand a in bits 7..4, with
  77  *                 the 4-bit immediate from operand b in bits 3..0.
  78  * \173\xab      - the register number from operand a in bits 7..4, with
  79  *                 the value b in bits 3..0.
  80  * \174\a        - the register number from operand a in bits 7..4, and
  81  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  82  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  83  *                 field equal to digit b.
  84  * \250..\253    - same as \150..\153, except warn if the 64-bit operand
  85  *                 is not equal to the truncated and sign-extended 32-bit
  86  *                 operand; used for 32-bit immediates in 64-bit mode.
  87  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
  88  * \260..\263    - this instruction uses VEX/XOP rather than REX, with the
  89  *                 V field taken from operand 0..3.
  90  * \270          - this instruction uses VEX/XOP rather than REX, with the
  91  *                 V field set to 1111b.
  92  *
  93  * VEX/XOP prefixes are followed by the sequence:
  94  * \tmm\wlp        where mm is the M field; and wlp is:
  95  *                 00 0ww lpp
  96  *                 [w0] ww = 0 for W = 0
  97  *                 [w1] ww = 1 for W = 1
  98  *                 [wx] ww = 2 for W don't care (always assembled as 0)
  99  *                 [ww] ww = 3 for W used as REX.W
 100  *
 101  * t = 0 for VEX (C4/C5), t = 1 for XOP (8F).
 102  *
 103  * \274..\277    - a signed byte immediate operand, from operand 0..3,
 104  *                 which is to be extended to the operand size.
 105  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
 106  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
 107  * \312          - (disassembler only) invalid with non-default address size.
 108  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
 109  * \314          - (disassembler only) invalid with REX.B
 110  * \315          - (disassembler only) invalid with REX.X
 111  * \316          - (disassembler only) invalid with REX.R
 112  * \317          - (disassembler only) invalid with REX.W
 113  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
 114  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
 115  * \322          - indicates that this instruction is only valid when the
 116  *                 operand size is the default (instruction to disassembler,
 117  *                 generates no code in the assembler)
 118  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
 119  * \324          - indicates 64-bit operand size requiring REX prefix.
 120  * \325          - instruction which always uses spl/bpl/sil/dil
 121  * \330          - a literal byte follows in the code stream, to be added
 122  *                 to the condition code value of the instruction.
 123  * \331          - instruction not valid with REP prefix.  Hint for
 124  *                 disassembler only; for SSE instructions.
 125  * \332          - REP prefix (0xF2 byte) used as opcode extension.
 126  * \333          - REP prefix (0xF3 byte) used as opcode extension.
 127  * \334          - LOCK prefix used as REX.R (used in non-64-bit mode)
 128  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
 129  * \336          - force a REP(E) prefix (0xF2) even if not specified.
 130  * \337          - force a REPNE prefix (0xF3) even if not specified.
 131  *                 \336-\337 are still listed as prefixes in the disassembler.
 132  * \340          - reserve <operand 0> bytes of uninitialized storage.
 133  *                 Operand 0 had better be a segmentless constant.
 134  * \341          - this instruction needs a WAIT "prefix"
 135  * \344,\345     - the PUSH/POP (respectively) codes for CS, DS, ES, SS
 136  *                 (POP is never used for CS) depending on operand 0
 137  * \346,\347     - the second byte of PUSH/POP codes for FS, GS, depending
 138  *                 on operand 0
 139  * \360          - no SSE prefix (== \364\331)
 140  * \361          - 66 SSE prefix (== \366\331)
 141  * \362          - F2 SSE prefix (== \364\332)
 142  * \363          - F3 SSE prefix (== \364\333)
 143  * \364          - operand-size prefix (0x66) not permitted
 144  * \365          - address-size prefix (0x67) not permitted
 145  * \366          - operand-size prefix (0x66) used as opcode extension
 146  * \367          - address-size prefix (0x67) used as opcode extension
 147  * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
 148  *                 370 is used for Jcc, 371 is used for JMP.
 149  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 150  *                 used for conditional jump over longer jump
 151  */
 152
 153 #include "compiler.h"
 154
 155 #include <stdio.h>
 156 #include <string.h>
 157 #include <inttypes.h>
 158
 159 #include "nasm.h"
 160 #include "nasmlib.h"
 161 #include "assemble.h"
 162 #include "insns.h"
 163 #include "tables.h"
 164
 165 enum match_result {
 166     /*
 167      * Matching errors.  These should be sorted so that more specific
 168      * errors come later in the sequence.
 169      */
 170     MERR_INVALOP,
 171     MERR_OPSIZEMISSING,
 172     MERR_OPSIZEMISMATCH,
 173     MERR_BADCPU,
 174     MERR_BADMODE,
 175     /*
 176      * Matching success; the conditional ones first
 177      */
 178     MOK_JUMP,   /* Matching OK but needs jmp_match() */
 179     MOK_GOOD    /* Matching unconditionally OK */
 180 };
 181
 182 typedef struct {
 183     int sib_present;                 /* is a SIB byte necessary? */
 184     int bytes;                       /* # of bytes of offset needed */
 185     int size;                        /* lazy - this is sib+bytes+1 */
 186     uint8_t modrm, sib, rex, rip;    /* the bytes themselves */
 187 } ea;
 188
 189 static uint32_t cpu;            /* cpu level received from nasm.c */
 190 static efunc errfunc;
 191 static struct ofmt *outfmt;
 192 static ListGen *list;
 193
 194 static int64_t calcsize(int32_t, int64_t, int, insn *, const uint8_t *);
 195 static void gencode(int32_t segment, int64_t offset, int bits,
 196                     insn * ins, const struct itemplate *temp,
 197                     int64_t insn_end);
 198 static enum match_result find_match(const struct itemplate **tempp,
 199                                     insn *instruction,
 200                                     int32_t segment, int64_t offset, int bits);
 201 static enum match_result matches(const struct itemplate *, insn *, int bits);
 202 static opflags_t regflag(const operand *);
 203 static int32_t regval(const operand *);
 204 static int rexflags(int, opflags_t, int);
 205 static int op_rexflags(const operand *, int);
 206 static ea *process_ea(operand *, ea *, int, int, int, opflags_t);
 207 static void add_asp(insn *, int);
 208
 209 static int has_prefix(insn * ins, enum prefix_pos pos, enum prefixes prefix)
 210 {
 211     return ins->prefixes[pos] == prefix;
 212 }
 213
 214 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 215 {
 216     if (ins->prefixes[pos])
 217         errfunc(ERR_NONFATAL, "invalid %s prefix",
 218                 prefix_name(ins->prefixes[pos]));
 219 }
 220
 221 static const char *size_name(int size)
 222 {
 223     switch (size) {
 224     case 1:
 225         return "byte";
 226     case 2:
 227         return "word";
 228     case 4:
 229         return "dword";
 230     case 8:
 231         return "qword";
 232     case 10:
 233         return "tword";
 234     case 16:
 235         return "oword";
 236     case 32:
 237         return "yword";
 238     default:
 239         return "???";
 240     }
 241 }
 242
 243 static void warn_overflow(int pass, int size)
 244 {
 245     errfunc(ERR_WARNING | pass | ERR_WARN_NOV,
 246             "%s data exceeds bounds", size_name(size));
 247 }
 248
 249 static void warn_overflow_const(int64_t data, int size)
 250 {
 251     if (overflow_general(data, size))
 252         warn_overflow(ERR_PASS1, size);
 253 }
 254
 255 static void warn_overflow_opd(const struct operand *o, int size)
 256 {
 257     if (o->wrt == NO_SEG && o->segment == NO_SEG) {
 258         if (overflow_general(o->offset, size))
 259             warn_overflow(ERR_PASS2, size);
 260     }
 261 }
 262
 263 /*
 264  * This routine wrappers the real output format's output routine,
 265  * in order to pass a copy of the data off to the listing file
 266  * generator at the same time.
 267  */
 268 static void out(int64_t offset, int32_t segto, const void *data,
 269                 enum out_type type, uint64_t size,
 270                 int32_t segment, int32_t wrt)
 271 {
 272     static int32_t lineno = 0;     /* static!!! */
 273     static char *lnfname = NULL;
 274     uint8_t p[8];
 275
 276     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 277         /*
 278          * This is a non-relocated address, and we're going to
 279          * convert it into RAWDATA format.
 280          */
 281         uint8_t *q = p;
 282
 283         if (size > 8) {
 284             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 285             return;
 286         }
 287
 288         WRITEADDR(q, *(int64_t *)data, size);
 289         data = p;
 290         type = OUT_RAWDATA;
 291     }
 292
 293     list->output(offset, data, type, size);
 294
 295     /*
 296      * this call to src_get determines when we call the
 297      * debug-format-specific "linenum" function
 298      * it updates lineno and lnfname to the current values
 299      * returning 0 if "same as last time", -2 if lnfname
 300      * changed, and the amount by which lineno changed,
 301      * if it did. thus, these variables must be static
 302      */
 303
 304     if (src_get(&lineno, &lnfname))
 305         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 306
 307     outfmt->output(segto, data, type, size, segment, wrt);
 308 }
 309
 310 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 311                      insn * ins, const uint8_t *code)
 312 {
 313     int64_t isize;
 314     uint8_t c = code[0];
 315
 316     if ((c != 0370 && c != 0371) || (ins->oprs[0].type & STRICT))
 317         return false;
 318     if (!optimizing)
 319         return false;
 320     if (optimizing < 0 && c == 0371)
 321         return false;
 322
 323     isize = calcsize(segment, offset, bits, ins, code);
 324
 325     if (ins->oprs[0].opflags & OPFLAG_UNKNOWN)
 326         /* Be optimistic in pass 1 */
 327         return true;
 328
 329     if (ins->oprs[0].segment != segment)
 330         return false;
 331
 332     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 333     return (isize >= -128 && isize <= 127); /* is it byte size? */
 334 }
 335
 336 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 337                  insn * instruction, struct ofmt *output, efunc error,
 338                  ListGen * listgen)
 339 {
 340     const struct itemplate *temp;
 341     int j;
 342     enum match_result m;
 343     int64_t insn_end;
 344     int32_t itimes;
 345     int64_t start = offset;
 346     int64_t wsize;              /* size for DB etc. */
 347
 348     errfunc = error;            /* to pass to other functions */
 349     cpu = cp;
 350     outfmt = output;            /* likewise */
 351     list = listgen;             /* and again */
 352
 353     wsize = idata_bytes(instruction->opcode);
 354     if (wsize == -1)
 355         return 0;
 356
 357     if (wsize) {
 358         extop *e;
 359         int32_t t = instruction->times;
 360         if (t < 0)
 361             errfunc(ERR_PANIC,
 362                     "instruction->times < 0 (%ld) in assemble()", t);
 363
 364         while (t--) {           /* repeat TIMES times */
 365             list_for_each(e, instruction->eops) {
 366                 if (e->type == EOT_DB_NUMBER) {
 367                     if (wsize > 8) {
 368                         errfunc(ERR_NONFATAL,
 369                                 "integer supplied to a DT, DO or DY"
 370                                 " instruction");
 371                     } else {
 372                         out(offset, segment, &e->offset,
 373                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 374                         offset += wsize;
 375                     }
 376                 } else if (e->type == EOT_DB_STRING ||
 377                            e->type == EOT_DB_STRING_FREE) {
 378                     int align;
 379
 380                     out(offset, segment, e->stringval,
 381                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 382                     align = e->stringlen % wsize;
 383
 384                     if (align) {
 385                         align = wsize - align;
 386                         out(offset, segment, zero_buffer,
 387                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 388                     }
 389                     offset += e->stringlen + align;
 390                 }
 391             }
 392             if (t > 0 && t == instruction->times - 1) {
 393                 /*
 394                  * Dummy call to list->output to give the offset to the
 395                  * listing module.
 396                  */
 397                 list->output(offset, NULL, OUT_RAWDATA, 0);
 398                 list->uplevel(LIST_TIMES);
 399             }
 400         }
 401         if (instruction->times > 1)
 402             list->downlevel(LIST_TIMES);
 403         return offset - start;
 404     }
 405
 406     if (instruction->opcode == I_INCBIN) {
 407         const char *fname = instruction->eops->stringval;
 408         FILE *fp;
 409
 410         fp = fopen(fname, "rb");
 411         if (!fp) {
 412             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 413                   fname);
 414         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 415             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 416                   fname);
 417         } else {
 418             static char buf[4096];
 419             size_t t = instruction->times;
 420             size_t base = 0;
 421             size_t len;
 422
 423             len = ftell(fp);
 424             if (instruction->eops->next) {
 425                 base = instruction->eops->next->offset;
 426                 len -= base;
 427                 if (instruction->eops->next->next &&
 428                     len > (size_t)instruction->eops->next->next->offset)
 429                     len = (size_t)instruction->eops->next->next->offset;
 430             }
 431             /*
 432              * Dummy call to list->output to give the offset to the
 433              * listing module.
 434              */
 435             list->output(offset, NULL, OUT_RAWDATA, 0);
 436             list->uplevel(LIST_INCBIN);
 437             while (t--) {
 438                 size_t l;
 439
 440                 fseek(fp, base, SEEK_SET);
 441                 l = len;
 442                 while (l > 0) {
 443                     int32_t m;
 444                     m = fread(buf, 1, l > sizeof(buf) ? sizeof(buf) : l, fp);
 445                     if (!m) {
 446                         /*
 447                          * This shouldn't happen unless the file
 448                          * actually changes while we are reading
 449                          * it.
 450                          */
 451                         error(ERR_NONFATAL,
 452                               "`incbin': unexpected EOF while"
 453                               " reading file `%s'", fname);
 454                         t = 0;  /* Try to exit cleanly */
 455                         break;
 456                     }
 457                     out(offset, segment, buf, OUT_RAWDATA, m,
 458                         NO_SEG, NO_SEG);
 459                     l -= m;
 460                 }
 461             }
 462             list->downlevel(LIST_INCBIN);
 463             if (instruction->times > 1) {
 464                 /*
 465                  * Dummy call to list->output to give the offset to the
 466                  * listing module.
 467                  */
 468                 list->output(offset, NULL, OUT_RAWDATA, 0);
 469                 list->uplevel(LIST_TIMES);
 470                 list->downlevel(LIST_TIMES);
 471             }
 472             fclose(fp);
 473             return instruction->times * len;
 474         }
 475         return 0;               /* if we're here, there's an error */
 476     }
 477
 478     /* Check to see if we need an address-size prefix */
 479     add_asp(instruction, bits);
 480
 481     m = find_match(&temp, instruction, segment, offset, bits);
 482
 483     if (m == MOK_GOOD) {
 484         /* Matches! */
 485         int64_t insn_size = calcsize(segment, offset, bits,
 486                                      instruction, temp->code);
 487         itimes = instruction->times;
 488         if (insn_size < 0)  /* shouldn't be, on pass two */
 489             error(ERR_PANIC, "errors made it through from pass one");
 490         else
 491             while (itimes--) {
 492                 for (j = 0; j < MAXPREFIX; j++) {
 493                     uint8_t c = 0;
 494                     switch (instruction->prefixes[j]) {
 495                     case P_WAIT:
 496                         c = 0x9B;
 497                         break;
 498                     case P_LOCK:
 499                         c = 0xF0;
 500                         break;
 501                     case P_REPNE:
 502                     case P_REPNZ:
 503                         c = 0xF2;
 504                         break;
 505                     case P_REPE:
 506                     case P_REPZ:
 507                     case P_REP:
 508                         c = 0xF3;
 509                         break;
 510                     case R_CS:
 511                         if (bits == 64) {
 512                             error(ERR_WARNING | ERR_PASS2,
 513                                   "cs segment base generated, but will be ignored in 64-bit mode");
 514                         }
 515                         c = 0x2E;
 516                         break;
 517                     case R_DS:
 518                         if (bits == 64) {
 519                             error(ERR_WARNING | ERR_PASS2,
 520                                   "ds segment base generated, but will be ignored in 64-bit mode");
 521                         }
 522                         c = 0x3E;
 523                         break;
 524                     case R_ES:
 525                         if (bits == 64) {
 526                             error(ERR_WARNING | ERR_PASS2,
 527                                   "es segment base generated, but will be ignored in 64-bit mode");
 528                         }
 529                         c = 0x26;
 530                         break;
 531                     case R_FS:
 532                         c = 0x64;
 533                         break;
 534                     case R_GS:
 535                         c = 0x65;
 536                         break;
 537                     case R_SS:
 538                         if (bits == 64) {
 539                             error(ERR_WARNING | ERR_PASS2,
 540                                   "ss segment base generated, but will be ignored in 64-bit mode");
 541                         }
 542                         c = 0x36;
 543                         break;
 544                     case R_SEGR6:
 545                     case R_SEGR7:
 546                         error(ERR_NONFATAL,
 547                               "segr6 and segr7 cannot be used as prefixes");
 548                         break;
 549                     case P_A16:
 550                         if (bits == 64) {
 551                             error(ERR_NONFATAL,
 552                                   "16-bit addressing is not supported "
 553                                   "in 64-bit mode");
 554                         } else if (bits != 16)
 555                             c = 0x67;
 556                         break;
 557                     case P_A32:
 558                         if (bits != 32)
 559                             c = 0x67;
 560                         break;
 561                     case P_A64:
 562                         if (bits != 64) {
 563                             error(ERR_NONFATAL,
 564                                   "64-bit addressing is only supported "
 565                                   "in 64-bit mode");
 566                         }
 567                         break;
 568                     case P_ASP:
 569                         c = 0x67;
 570                         break;
 571                     case P_O16:
 572                         if (bits != 16)
 573                             c = 0x66;
 574                         break;
 575                     case P_O32:
 576                         if (bits == 16)
 577                             c = 0x66;
 578                         break;
 579                     case P_O64:
 580                         /* REX.W */
 581                         break;
 582                     case P_OSP:
 583                         c = 0x66;
 584                         break;
 585                     case P_none:
 586                         break;
 587                     default:
 588                         error(ERR_PANIC, "invalid instruction prefix");
 589                     }
 590                     if (c != 0) {
 591                         out(offset, segment, &c, OUT_RAWDATA, 1,
 592                             NO_SEG, NO_SEG);
 593                         offset++;
 594                     }
 595                 }
 596                 insn_end = offset + insn_size;
 597                 gencode(segment, offset, bits, instruction,
 598                         temp, insn_end);
 599                 offset += insn_size;
 600                 if (itimes > 0 && itimes == instruction->times - 1) {
 601                     /*
 602                      * Dummy call to list->output to give the offset to the
 603                      * listing module.
 604                      */
 605                     list->output(offset, NULL, OUT_RAWDATA, 0);
 606                     list->uplevel(LIST_TIMES);
 607                 }
 608             }
 609         if (instruction->times > 1)
 610             list->downlevel(LIST_TIMES);
 611         return offset - start;
 612     } else {
 613         /* No match */
 614         switch (m) {
 615         case MERR_OPSIZEMISSING:
 616             error(ERR_NONFATAL, "operation size not specified");
 617             break;
 618         case MERR_OPSIZEMISMATCH:
 619             error(ERR_NONFATAL, "mismatch in operand sizes");
 620             break;
 621         case MERR_BADCPU:
 622             error(ERR_NONFATAL, "no instruction for this cpu level");
 623             break;
 624         case MERR_BADMODE:
 625             error(ERR_NONFATAL, "instruction not supported in %d-bit mode",
 626                   bits);
 627             break;
 628         default:
 629             error(ERR_NONFATAL,
 630                   "invalid combination of opcode and operands");
 631             break;
 632         }
 633     }
 634     return 0;
 635 }
 636
 637 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 638                   insn * instruction, efunc error)
 639 {
 640     const struct itemplate *temp;
 641     enum match_result m;
 642
 643     errfunc = error;            /* to pass to other functions */
 644     cpu = cp;
 645
 646     if (instruction->opcode == I_none)
 647         return 0;
 648
 649     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 650         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 651         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 652         instruction->opcode == I_DY) {
 653         extop *e;
 654         int32_t isize, osize, wsize;
 655
 656         isize = 0;
 657         wsize = idata_bytes(instruction->opcode);
 658
 659         list_for_each(e, instruction->eops) {
 660             int32_t align;
 661
 662             osize = 0;
 663             if (e->type == EOT_DB_NUMBER) {
 664                 osize = 1;
 665                 warn_overflow_const(e->offset, wsize);
 666             } else if (e->type == EOT_DB_STRING ||
 667                        e->type == EOT_DB_STRING_FREE)
 668                 osize = e->stringlen;
 669
 670             align = (-osize) % wsize;
 671             if (align < 0)
 672                 align += wsize;
 673             isize += osize + align;
 674         }
 675         return isize * instruction->times;
 676     }
 677
 678     if (instruction->opcode == I_INCBIN) {
 679         const char *fname = instruction->eops->stringval;
 680         FILE *fp;
 681         int64_t val = 0;
 682         size_t len;
 683
 684         fp = fopen(fname, "rb");
 685         if (!fp)
 686             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 687                   fname);
 688         else if (fseek(fp, 0L, SEEK_END) < 0)
 689             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 690                   fname);
 691         else {
 692             len = ftell(fp);
 693             if (instruction->eops->next) {
 694                 len -= instruction->eops->next->offset;
 695                 if (instruction->eops->next->next &&
 696                     len > (size_t)instruction->eops->next->next->offset) {
 697                     len = (size_t)instruction->eops->next->next->offset;
 698                 }
 699             }
 700             val = instruction->times * len;
 701         }
 702         if (fp)
 703             fclose(fp);
 704         return val;
 705     }
 706
 707     /* Check to see if we need an address-size prefix */
 708     add_asp(instruction, bits);
 709
 710     m = find_match(&temp, instruction, segment, offset, bits);
 711     if (m == MOK_GOOD) {
 712         /* we've matched an instruction. */
 713         int64_t isize;
 714         const uint8_t *codes = temp->code;
 715         int j;
 716
 717         isize = calcsize(segment, offset, bits, instruction, codes);
 718         if (isize < 0)
 719             return -1;
 720         for (j = 0; j < MAXPREFIX; j++) {
 721             switch (instruction->prefixes[j]) {
 722             case P_A16:
 723                 if (bits != 16)
 724                     isize++;
 725                 break;
 726             case P_A32:
 727                 if (bits != 32)
 728                     isize++;
 729                 break;
 730             case P_O16:
 731                 if (bits != 16)
 732                     isize++;
 733                 break;
 734             case P_O32:
 735                 if (bits == 16)
 736                     isize++;
 737                 break;
 738             case P_A64:
 739             case P_O64:
 740             case P_none:
 741                 break;
 742             default:
 743                 isize++;
 744                 break;
 745             }
 746         }
 747         return isize * instruction->times;
 748     } else {
 749         return -1;                  /* didn't match any instruction */
 750     }
 751 }
 752
 753 static bool possible_sbyte(operand *o)
 754 {
 755     return o->wrt == NO_SEG && o->segment == NO_SEG &&
 756         !(o->opflags & OPFLAG_UNKNOWN) &&
 757         optimizing >= 0 && !(o->type & STRICT);
 758 }
 759
 760 /* check that opn[op]  is a signed byte of size 16 or 32 */
 761 static bool is_sbyte16(operand *o)
 762 {
 763     int16_t v;
 764
 765     if (!possible_sbyte(o))
 766         return false;
 767
 768     v = o->offset;
 769     return v >= -128 && v <= 127;
 770 }
 771
 772 static bool is_sbyte32(operand *o)
 773 {
 774     int32_t v;
 775
 776     if (!possible_sbyte(o))
 777         return false;
 778
 779     v = o->offset;
 780     return v >= -128 && v <= 127;
 781 }
 782
 783 /* Common construct */
 784 #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
 785
 786 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 787                         insn * ins, const uint8_t *codes)
 788 {
 789     int64_t length = 0;
 790     uint8_t c;
 791     int rex_mask = ~0;
 792     int op1, op2;
 793     struct operand *opx;
 794     uint8_t opex = 0;
 795
 796     ins->rex = 0;               /* Ensure REX is reset */
 797
 798     if (ins->prefixes[PPS_OSIZE] == P_O64)
 799         ins->rex |= REX_W;
 800
 801     (void)segment;              /* Don't warn that this parameter is unused */
 802     (void)offset;               /* Don't warn that this parameter is unused */
 803
 804     while (*codes) {
 805         c = *codes++;
 806         op1 = (c & 3) + ((opex & 1) << 2);
 807         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
 808         opx = &ins->oprs[op1];
 809         opex = 0;               /* For the next iteration */
 810
 811         switch (c) {
 812         case 01:
 813         case 02:
 814         case 03:
 815         case 04:
 816             codes += c, length += c;
 817             break;
 818
 819         case 05:
 820         case 06:
 821         case 07:
 822             opex = c;
 823             break;
 824
 825         case4(010):
 826             ins->rex |=
 827                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 828             codes++, length++;
 829             break;
 830
 831         case4(014):
 832         case4(020):
 833         case4(024):
 834             length++;
 835             break;
 836
 837         case4(030):
 838             length += 2;
 839             break;
 840
 841         case4(034):
 842             if (opx->type & (BITS16 | BITS32 | BITS64))
 843                 length += (opx->type & BITS16) ? 2 : 4;
 844             else
 845                 length += (bits == 16) ? 2 : 4;
 846             break;
 847
 848         case4(040):
 849             length += 4;
 850             break;
 851
 852         case4(044):
 853             length += ins->addr_size >> 3;
 854             break;
 855
 856         case4(050):
 857             length++;
 858             break;
 859
 860         case4(054):
 861             length += 8; /* MOV reg64/imm */
 862             break;
 863
 864         case4(060):
 865             length += 2;
 866             break;
 867
 868         case4(064):
 869             if (opx->type & (BITS16 | BITS32 | BITS64))
 870                 length += (opx->type & BITS16) ? 2 : 4;
 871             else
 872                 length += (bits == 16) ? 2 : 4;
 873             break;
 874
 875         case4(070):
 876             length += 4;
 877             break;
 878
 879         case4(074):
 880             length += 2;
 881             break;
 882
 883         case4(0140):
 884             length += is_sbyte16(opx) ? 1 : 2;
 885             break;
 886
 887         case4(0144):
 888             codes++;
 889             length++;
 890             break;
 891
 892         case4(0150):
 893             length += is_sbyte32(opx) ? 1 : 4;
 894             break;
 895
 896         case4(0154):
 897             codes++;
 898             length++;
 899             break;
 900
 901         case4(0160):
 902             length++;
 903             ins->rex |= REX_D;
 904             ins->drexdst = regval(opx);
 905             break;
 906
 907         case4(0164):
 908             length++;
 909             ins->rex |= REX_D|REX_OC;
 910             ins->drexdst = regval(opx);
 911             break;
 912
 913         case 0171:
 914             break;
 915
 916         case 0172:
 917         case 0173:
 918         case 0174:
 919             codes++;
 920             length++;
 921             break;
 922
 923         case4(0250):
 924             length += is_sbyte32(opx) ? 1 : 4;
 925             break;
 926
 927         case4(0254):
 928             length += 4;
 929             break;
 930
 931         case4(0260):
 932             ins->rex |= REX_V;
 933             ins->drexdst = regval(opx);
 934             ins->vex_cm = *codes++;
 935             ins->vex_wlp = *codes++;
 936             break;
 937
 938         case 0270:
 939             ins->rex |= REX_V;
 940             ins->drexdst = 0;
 941             ins->vex_cm = *codes++;
 942             ins->vex_wlp = *codes++;
 943             break;
 944
 945         case4(0274):
 946             length++;
 947             break;
 948
 949         case4(0300):
 950             break;
 951
 952         case 0310:
 953             if (bits == 64)
 954                 return -1;
 955             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
 956             break;
 957
 958         case 0311:
 959             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
 960             break;
 961
 962         case 0312:
 963             break;
 964
 965         case 0313:
 966             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
 967                 has_prefix(ins, PPS_ASIZE, P_A32))
 968                 return -1;
 969             break;
 970
 971         case4(0314):
 972             break;
 973
 974         case 0320:
 975             length += (bits != 16);
 976             break;
 977
 978         case 0321:
 979             length += (bits == 16);
 980             break;
 981
 982         case 0322:
 983             break;
 984
 985         case 0323:
 986             rex_mask &= ~REX_W;
 987             break;
 988
 989         case 0324:
 990             ins->rex |= REX_W;
 991             break;
 992
 993         case 0325:
 994             ins->rex |= REX_NH;
 995             break;
 996
 997         case 0330:
 998             codes++, length++;
 999             break;
1000
1001         case 0331:
1002             break;
1003
1004         case 0332:
1005         case 0333:
1006             length++;
1007             break;
1008
1009         case 0334:
1010             ins->rex |= REX_L;
1011             break;
1012
1013         case 0335:
1014             break;
1015
1016         case 0336:
1017             if (!ins->prefixes[PPS_LREP])
1018                 ins->prefixes[PPS_LREP] = P_REP;
1019             break;
1020
1021         case 0337:
1022             if (!ins->prefixes[PPS_LREP])
1023                 ins->prefixes[PPS_LREP] = P_REPNE;
1024             break;
1025
1026         case 0340:
1027             if (ins->oprs[0].segment != NO_SEG)
1028                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1029                         " quantity of BSS space");
1030             else
1031                 length += ins->oprs[0].offset;
1032             break;
1033
1034         case 0341:
1035             if (!ins->prefixes[PPS_WAIT])
1036                 ins->prefixes[PPS_WAIT] = P_WAIT;
1037             break;
1038
1039         case4(0344):
1040             length++;
1041             break;
1042
1043         case 0360:
1044             break;
1045
1046         case 0361:
1047         case 0362:
1048         case 0363:
1049             length++;
1050             break;
1051
1052         case 0364:
1053         case 0365:
1054             break;
1055
1056         case 0366:
1057         case 0367:
1058             length++;
1059             break;
1060
1061         case 0370:
1062         case 0371:
1063         case 0372:
1064             break;
1065
1066         case 0373:
1067             length++;
1068             break;
1069
1070         case4(0100):
1071         case4(0110):
1072         case4(0120):
1073         case4(0130):
1074         case4(0200):
1075         case4(0204):
1076         case4(0210):
1077         case4(0214):
1078         case4(0220):
1079         case4(0224):
1080         case4(0230):
1081         case4(0234):
1082             {
1083                 ea ea_data;
1084                 int rfield;
1085                 opflags_t rflags;
1086                 struct operand *opy = &ins->oprs[op2];
1087
1088                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1089
1090                 if (c <= 0177) {
1091                     /* pick rfield from operand b (opx) */
1092                     rflags = regflag(opx);
1093                     rfield = nasm_regvals[opx->basereg];
1094                 } else {
1095                     rflags = 0;
1096                     rfield = c & 7;
1097                 }
1098                 if (!process_ea(opy, &ea_data, bits,
1099                                 ins->addr_size, rfield, rflags)) {
1100                     errfunc(ERR_NONFATAL, "invalid effective address");
1101                     return -1;
1102                 } else {
1103                     ins->rex |= ea_data.rex;
1104                     length += ea_data.size;
1105                 }
1106             }
1107             break;
1108
1109         default:
1110             errfunc(ERR_PANIC, "internal instruction table corrupt"
1111                     ": instruction code \\%o (0x%02X) given", c, c);
1112             break;
1113         }
1114     }
1115
1116     ins->rex &= rex_mask;
1117
1118     if (ins->rex & REX_NH) {
1119         if (ins->rex & REX_H) {
1120             errfunc(ERR_NONFATAL, "instruction cannot use high registers");
1121             return -1;
1122         }
1123         ins->rex &= ~REX_P;        /* Don't force REX prefix due to high reg */
1124     }
1125
1126     if (ins->rex & REX_V) {
1127         int bad32 = REX_R|REX_W|REX_X|REX_B;
1128
1129         if (ins->rex & REX_H) {
1130             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1131             return -1;
1132         }
1133         switch (ins->vex_wlp & 030) {
1134         case 000:
1135         case 020:
1136             ins->rex &= ~REX_W;
1137             break;
1138         case 010:
1139             ins->rex |= REX_W;
1140             bad32 &= ~REX_W;
1141             break;
1142         case 030:
1143             /* Follow REX_W */
1144             break;
1145         }
1146
1147         if (bits != 64 && ((ins->rex & bad32) || ins->drexdst > 7)) {
1148             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1149             return -1;
1150         }
1151         if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_R|REX_B)))
1152             length += 3;
1153         else
1154             length += 2;
1155     } else if (ins->rex & REX_D) {
1156         if (ins->rex & REX_H) {
1157             errfunc(ERR_NONFATAL, "cannot use high register in drex instruction");
1158             return -1;
1159         }
1160         if (bits != 64 && ((ins->rex & (REX_R|REX_W|REX_X|REX_B)) ||
1161                            ins->drexdst > 7)) {
1162             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1163             return -1;
1164         }
1165         length++;
1166     } else if (ins->rex & REX_REAL) {
1167         if (ins->rex & REX_H) {
1168             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1169             return -1;
1170         } else if (bits == 64) {
1171             length++;
1172         } else if ((ins->rex & REX_L) &&
1173                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1174                    cpu >= IF_X86_64) {
1175             /* LOCK-as-REX.R */
1176             assert_no_prefix(ins, PPS_LREP);
1177             length++;
1178         } else {
1179             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1180             return -1;
1181         }
1182     }
1183
1184     return length;
1185 }
1186
1187 #define EMIT_REX()                                                              \
1188     if (!(ins->rex & (REX_D|REX_V)) && (ins->rex & REX_REAL) && (bits == 64)) { \
1189         ins->rex = (ins->rex & REX_REAL)|REX_P;                                 \
1190         out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG);        \
1191         ins->rex = 0;                                                           \
1192         offset += 1;                                                            \
1193     }
1194
1195 static void gencode(int32_t segment, int64_t offset, int bits,
1196                     insn * ins, const struct itemplate *temp,
1197                     int64_t insn_end)
1198 {
1199     static char condval[] = {   /* conditional opcodes */
1200         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1201         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1202         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1203     };
1204     uint8_t c;
1205     uint8_t bytes[4];
1206     int64_t size;
1207     int64_t data;
1208     int op1, op2;
1209     struct operand *opx;
1210     const uint8_t *codes = temp->code;
1211     uint8_t opex = 0;
1212
1213     while (*codes) {
1214         c = *codes++;
1215         op1 = (c & 3) + ((opex & 1) << 2);
1216         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
1217         opx = &ins->oprs[op1];
1218         opex = 0;                /* For the next iteration */
1219
1220         switch (c) {
1221         case 01:
1222         case 02:
1223         case 03:
1224         case 04:
1225             EMIT_REX();
1226             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1227             codes += c;
1228             offset += c;
1229             break;
1230
1231         case 05:
1232         case 06:
1233         case 07:
1234             opex = c;
1235             break;
1236
1237         case4(010):
1238             EMIT_REX();
1239             bytes[0] = *codes++ + (regval(opx) & 7);
1240             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1241             offset += 1;
1242             break;
1243
1244         case4(014):
1245             /*
1246              * The test for BITS8 and SBYTE here is intended to avoid
1247              * warning on optimizer actions due to SBYTE, while still
1248              * warn on explicit BYTE directives.  Also warn, obviously,
1249              * if the optimizer isn't enabled.
1250              */
1251             if (((opx->type & BITS8) ||
1252                  !(opx->type & temp->opd[op1] & BYTENESS)) &&
1253                 (opx->offset < -128 || opx->offset > 127)) {
1254                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1255                         "signed byte value exceeds bounds");
1256             }
1257             if (opx->segment != NO_SEG) {
1258                 data = opx->offset;
1259                 out(offset, segment, &data, OUT_ADDRESS, 1,
1260                     opx->segment, opx->wrt);
1261             } else {
1262                 bytes[0] = opx->offset;
1263                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1264                     NO_SEG);
1265             }
1266             offset += 1;
1267             break;
1268
1269         case4(020):
1270             if (opx->offset < -256 || opx->offset > 255) {
1271                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1272                         "byte value exceeds bounds");
1273             }
1274             if (opx->segment != NO_SEG) {
1275                 data = opx->offset;
1276                 out(offset, segment, &data, OUT_ADDRESS, 1,
1277                     opx->segment, opx->wrt);
1278             } else {
1279                 bytes[0] = opx->offset;
1280                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1281                     NO_SEG);
1282             }
1283             offset += 1;
1284             break;
1285
1286         case4(024):
1287             if (opx->offset < 0 || opx->offset > 255)
1288                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1289                         "unsigned byte value exceeds bounds");
1290             if (opx->segment != NO_SEG) {
1291                 data = opx->offset;
1292                 out(offset, segment, &data, OUT_ADDRESS, 1,
1293                     opx->segment, opx->wrt);
1294             } else {
1295                 bytes[0] = opx->offset;
1296                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1297                     NO_SEG);
1298             }
1299             offset += 1;
1300             break;
1301
1302         case4(030):
1303             warn_overflow_opd(opx, 2);
1304             data = opx->offset;
1305             out(offset, segment, &data, OUT_ADDRESS, 2,
1306                 opx->segment, opx->wrt);
1307             offset += 2;
1308             break;
1309
1310         case4(034):
1311             if (opx->type & (BITS16 | BITS32))
1312                 size = (opx->type & BITS16) ? 2 : 4;
1313             else
1314                 size = (bits == 16) ? 2 : 4;
1315             warn_overflow_opd(opx, size);
1316             data = opx->offset;
1317             out(offset, segment, &data, OUT_ADDRESS, size,
1318                 opx->segment, opx->wrt);
1319             offset += size;
1320             break;
1321
1322         case4(040):
1323             warn_overflow_opd(opx, 4);
1324             data = opx->offset;
1325             out(offset, segment, &data, OUT_ADDRESS, 4,
1326                 opx->segment, opx->wrt);
1327             offset += 4;
1328             break;
1329
1330         case4(044):
1331             data = opx->offset;
1332             size = ins->addr_size >> 3;
1333             warn_overflow_opd(opx, size);
1334             out(offset, segment, &data, OUT_ADDRESS, size,
1335                 opx->segment, opx->wrt);
1336             offset += size;
1337             break;
1338
1339         case4(050):
1340             if (opx->segment != segment) {
1341                 data = opx->offset;
1342                 out(offset, segment, &data,
1343                     OUT_REL1ADR, insn_end - offset,
1344                     opx->segment, opx->wrt);
1345             } else {
1346                 data = opx->offset - insn_end;
1347                 if (data > 127 || data < -128)
1348                     errfunc(ERR_NONFATAL, "short jump is out of range");
1349                 out(offset, segment, &data,
1350                     OUT_ADDRESS, 1, NO_SEG, NO_SEG);
1351             }
1352             offset += 1;
1353             break;
1354
1355         case4(054):
1356             data = (int64_t)opx->offset;
1357             out(offset, segment, &data, OUT_ADDRESS, 8,
1358                 opx->segment, opx->wrt);
1359             offset += 8;
1360             break;
1361
1362         case4(060):
1363             if (opx->segment != segment) {
1364                 data = opx->offset;
1365                 out(offset, segment, &data,
1366                     OUT_REL2ADR, insn_end - offset,
1367                     opx->segment, opx->wrt);
1368             } else {
1369                 data = opx->offset - insn_end;
1370                 out(offset, segment, &data,
1371                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1372             }
1373             offset += 2;
1374             break;
1375
1376         case4(064):
1377             if (opx->type & (BITS16 | BITS32 | BITS64))
1378                 size = (opx->type & BITS16) ? 2 : 4;
1379             else
1380                 size = (bits == 16) ? 2 : 4;
1381             if (opx->segment != segment) {
1382                 data = opx->offset;
1383                 out(offset, segment, &data,
1384                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1385                     insn_end - offset, opx->segment, opx->wrt);
1386             } else {
1387                 data = opx->offset - insn_end;
1388                 out(offset, segment, &data,
1389                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1390             }
1391             offset += size;
1392             break;
1393
1394         case4(070):
1395             if (opx->segment != segment) {
1396                 data = opx->offset;
1397                 out(offset, segment, &data,
1398                     OUT_REL4ADR, insn_end - offset,
1399                     opx->segment, opx->wrt);
1400             } else {
1401                 data = opx->offset - insn_end;
1402                 out(offset, segment, &data,
1403                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1404             }
1405             offset += 4;
1406             break;
1407
1408         case4(074):
1409             if (opx->segment == NO_SEG)
1410                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1411                         " relocatable");
1412             data = 0;
1413             out(offset, segment, &data, OUT_ADDRESS, 2,
1414                 outfmt->segbase(1 + opx->segment),
1415                 opx->wrt);
1416             offset += 2;
1417             break;
1418
1419         case4(0140):
1420             data = opx->offset;
1421             warn_overflow_opd(opx, 2);
1422             if (is_sbyte16(opx)) {
1423                 bytes[0] = data;
1424                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1425                     NO_SEG);
1426                 offset++;
1427             } else {
1428                 out(offset, segment, &data, OUT_ADDRESS, 2,
1429                     opx->segment, opx->wrt);
1430                 offset += 2;
1431             }
1432             break;
1433
1434         case4(0144):
1435             EMIT_REX();
1436             bytes[0] = *codes++;
1437             if (is_sbyte16(opx))
1438                 bytes[0] |= 2;  /* s-bit */
1439             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1440             offset++;
1441             break;
1442
1443         case4(0150):
1444             data = opx->offset;
1445             warn_overflow_opd(opx, 4);
1446             if (is_sbyte32(opx)) {
1447                 bytes[0] = data;
1448                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1449                     NO_SEG);
1450                 offset++;
1451             } else {
1452                 out(offset, segment, &data, OUT_ADDRESS, 4,
1453                     opx->segment, opx->wrt);
1454                 offset += 4;
1455             }
1456             break;
1457
1458         case4(0154):
1459             EMIT_REX();
1460             bytes[0] = *codes++;
1461             if (is_sbyte32(opx))
1462                 bytes[0] |= 2;  /* s-bit */
1463             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1464             offset++;
1465             break;
1466
1467         case4(0160):
1468         case4(0164):
1469             break;
1470
1471         case 0171:
1472             bytes[0] =
1473                 (ins->drexdst << 4) |
1474                 (ins->rex & REX_OC ? 0x08 : 0) |
1475                 (ins->rex & (REX_R|REX_X|REX_B));
1476             ins->rex = 0;
1477             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1478             offset++;
1479             break;
1480
1481         case 0172:
1482             c = *codes++;
1483             opx = &ins->oprs[c >> 3];
1484             bytes[0] = nasm_regvals[opx->basereg] << 4;
1485             opx = &ins->oprs[c & 7];
1486             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1487                 errfunc(ERR_NONFATAL,
1488                         "non-absolute expression not permitted as argument %d",
1489                         c & 7);
1490             } else {
1491                 if (opx->offset & ~15) {
1492                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1493                             "four-bit argument exceeds bounds");
1494                 }
1495                 bytes[0] |= opx->offset & 15;
1496             }
1497             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1498             offset++;
1499             break;
1500
1501         case 0173:
1502             c = *codes++;
1503             opx = &ins->oprs[c >> 4];
1504             bytes[0] = nasm_regvals[opx->basereg] << 4;
1505             bytes[0] |= c & 15;
1506             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1507             offset++;
1508             break;
1509
1510         case 0174:
1511             c = *codes++;
1512             opx = &ins->oprs[c];
1513             bytes[0] = nasm_regvals[opx->basereg] << 4;
1514             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1515             offset++;
1516             break;
1517
1518         case4(0250):
1519             data = opx->offset;
1520             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1521                 (int32_t)data != (int64_t)data) {
1522                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1523                         "signed dword immediate exceeds bounds");
1524             }
1525             if (is_sbyte32(opx)) {
1526                 bytes[0] = data;
1527                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1528                     NO_SEG);
1529                 offset++;
1530             } else {
1531                 out(offset, segment, &data, OUT_ADDRESS, 4,
1532                     opx->segment, opx->wrt);
1533                 offset += 4;
1534             }
1535             break;
1536
1537         case4(0254):
1538             data = opx->offset;
1539             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1540                 (int32_t)data != (int64_t)data) {
1541                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1542                         "signed dword immediate exceeds bounds");
1543             }
1544             out(offset, segment, &data, OUT_ADDRESS, 4,
1545                 opx->segment, opx->wrt);
1546             offset += 4;
1547             break;
1548
1549         case4(0260):
1550         case 0270:
1551             codes += 2;
1552             if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1553                 bytes[0] = (ins->vex_cm >> 6) ? 0x8f : 0xc4;
1554                 bytes[1] = (ins->vex_cm & 31) | ((~ins->rex & 7) << 5);
1555                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1556                     ((~ins->drexdst & 15)<< 3) | (ins->vex_wlp & 07);
1557                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1558                 offset += 3;
1559             } else {
1560                 bytes[0] = 0xc5;
1561                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1562                     ((~ins->drexdst & 15) << 3) | (ins->vex_wlp & 07);
1563                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1564                 offset += 2;
1565             }
1566             break;
1567
1568         case4(0274):
1569         {
1570             uint64_t uv, um;
1571             int s;
1572
1573             if (ins->rex & REX_W)
1574                 s = 64;
1575             else if (ins->prefixes[PPS_OSIZE] == P_O16)
1576                 s = 16;
1577             else if (ins->prefixes[PPS_OSIZE] == P_O32)
1578                 s = 32;
1579             else
1580                 s = bits;
1581
1582             um = (uint64_t)2 << (s-1);
1583             uv = opx->offset;
1584
1585             if (uv > 127 && uv < (uint64_t)-128 &&
1586                 (uv < um-128 || uv > um-1)) {
1587                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1588                         "signed byte value exceeds bounds");
1589             }
1590             if (opx->segment != NO_SEG) {
1591                 data = uv;
1592                 out(offset, segment, &data, OUT_ADDRESS, 1,
1593                     opx->segment, opx->wrt);
1594             } else {
1595                 bytes[0] = uv;
1596                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1597                     NO_SEG);
1598             }
1599             offset += 1;
1600             break;
1601         }
1602
1603         case4(0300):
1604             break;
1605
1606         case 0310:
1607             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1608                 *bytes = 0x67;
1609                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1610                 offset += 1;
1611             } else
1612                 offset += 0;
1613             break;
1614
1615         case 0311:
1616             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1617                 *bytes = 0x67;
1618                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1619                 offset += 1;
1620             } else
1621                 offset += 0;
1622             break;
1623
1624         case 0312:
1625             break;
1626
1627         case 0313:
1628             ins->rex = 0;
1629             break;
1630
1631         case4(0314):
1632             break;
1633
1634         case 0320:
1635             if (bits != 16) {
1636                 *bytes = 0x66;
1637                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1638                 offset += 1;
1639             } else
1640                 offset += 0;
1641             break;
1642
1643         case 0321:
1644             if (bits == 16) {
1645                 *bytes = 0x66;
1646                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1647                 offset += 1;
1648             } else
1649                 offset += 0;
1650             break;
1651
1652         case 0322:
1653         case 0323:
1654             break;
1655
1656         case 0324:
1657             ins->rex |= REX_W;
1658             break;
1659
1660         case 0325:
1661             break;
1662
1663         case 0330:
1664             *bytes = *codes++ ^ condval[ins->condition];
1665             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1666             offset += 1;
1667             break;
1668
1669         case 0331:
1670             break;
1671
1672         case 0332:
1673         case 0333:
1674             *bytes = c - 0332 + 0xF2;
1675             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1676             offset += 1;
1677             break;
1678
1679         case 0334:
1680             if (ins->rex & REX_R) {
1681                 *bytes = 0xF0;
1682                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1683                 offset += 1;
1684             }
1685             ins->rex &= ~(REX_L|REX_R);
1686             break;
1687
1688         case 0335:
1689             break;
1690
1691         case 0336:
1692         case 0337:
1693             break;
1694
1695         case 0340:
1696             if (ins->oprs[0].segment != NO_SEG)
1697                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1698             else {
1699                 int64_t size = ins->oprs[0].offset;
1700                 if (size > 0)
1701                     out(offset, segment, NULL,
1702                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1703                 offset += size;
1704             }
1705             break;
1706
1707         case 0341:
1708             break;
1709
1710         case 0344:
1711         case 0345:
1712             bytes[0] = c & 1;
1713             switch (ins->oprs[0].basereg) {
1714             case R_CS:
1715                 bytes[0] += 0x0E;
1716                 break;
1717             case R_DS:
1718                 bytes[0] += 0x1E;
1719                 break;
1720             case R_ES:
1721                 bytes[0] += 0x06;
1722                 break;
1723             case R_SS:
1724                 bytes[0] += 0x16;
1725                 break;
1726             default:
1727                 errfunc(ERR_PANIC,
1728                         "bizarre 8086 segment register received");
1729             }
1730             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1731             offset++;
1732             break;
1733
1734         case 0346:
1735         case 0347:
1736             bytes[0] = c & 1;
1737             switch (ins->oprs[0].basereg) {
1738             case R_FS:
1739                 bytes[0] += 0xA0;
1740                 break;
1741             case R_GS:
1742                 bytes[0] += 0xA8;
1743                 break;
1744             default:
1745                 errfunc(ERR_PANIC,
1746                         "bizarre 386 segment register received");
1747             }
1748             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1749             offset++;
1750             break;
1751
1752         case 0360:
1753             break;
1754
1755         case 0361:
1756             bytes[0] = 0x66;
1757             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1758             offset += 1;
1759             break;
1760
1761         case 0362:
1762         case 0363:
1763             bytes[0] = c - 0362 + 0xf2;
1764             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1765             offset += 1;
1766             break;
1767
1768         case 0364:
1769         case 0365:
1770             break;
1771
1772         case 0366:
1773         case 0367:
1774             *bytes = c - 0366 + 0x66;
1775             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1776             offset += 1;
1777             break;
1778
1779         case 0370:
1780         case 0371:
1781         case 0372:
1782             break;
1783
1784         case 0373:
1785             *bytes = bits == 16 ? 3 : 5;
1786             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1787             offset += 1;
1788             break;
1789
1790         case4(0100):
1791         case4(0110):
1792         case4(0120):
1793         case4(0130):
1794         case4(0200):
1795         case4(0204):
1796         case4(0210):
1797         case4(0214):
1798         case4(0220):
1799         case4(0224):
1800         case4(0230):
1801         case4(0234):
1802             {
1803                 ea ea_data;
1804                 int rfield;
1805                 opflags_t rflags;
1806                 uint8_t *p;
1807                 int32_t s;
1808                 enum out_type type;
1809                 struct operand *opy = &ins->oprs[op2];
1810
1811                 if (c <= 0177) {
1812                     /* pick rfield from operand b (opx) */
1813                     rflags = regflag(opx);
1814                     rfield = nasm_regvals[opx->basereg];
1815                 } else {
1816                     /* rfield is constant */
1817                     rflags = 0;
1818                     rfield = c & 7;
1819                 }
1820
1821                 if (!process_ea(opy, &ea_data, bits, ins->addr_size,
1822                                 rfield, rflags)) {
1823                     errfunc(ERR_NONFATAL, "invalid effective address");
1824                 }
1825
1826
1827                 p = bytes;
1828                 *p++ = ea_data.modrm;
1829                 if (ea_data.sib_present)
1830                     *p++ = ea_data.sib;
1831
1832                 /* DREX suffixes come between the SIB and the displacement */
1833                 if (ins->rex & REX_D) {
1834                     *p++ = (ins->drexdst << 4) |
1835                            (ins->rex & REX_OC ? 0x08 : 0) |
1836                            (ins->rex & (REX_R|REX_X|REX_B));
1837                     ins->rex = 0;
1838                 }
1839
1840                 s = p - bytes;
1841                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1842
1843                 /*
1844                  * Make sure the address gets the right offset in case
1845                  * the line breaks in the .lst file (BR 1197827)
1846                  */
1847                 offset += s;
1848                 s = 0;
1849
1850                 switch (ea_data.bytes) {
1851                 case 0:
1852                     break;
1853                 case 1:
1854                 case 2:
1855                 case 4:
1856                 case 8:
1857                     data = opy->offset;
1858                     s += ea_data.bytes;
1859                     if (ea_data.rip) {
1860                         if (opy->segment == segment) {
1861                             data -= insn_end;
1862                             if (overflow_signed(data, ea_data.bytes))
1863                                 warn_overflow(ERR_PASS2, ea_data.bytes);
1864                             out(offset, segment, &data, OUT_ADDRESS,
1865                                 ea_data.bytes, NO_SEG, NO_SEG);
1866                         } else {
1867                             /* overflow check in output/linker? */
1868                             out(offset, segment, &data,        OUT_REL4ADR,
1869                                 insn_end - offset, opy->segment, opy->wrt);
1870                         }
1871                     } else {
1872                         if (overflow_general(opy->offset, ins->addr_size >> 3) ||
1873                             signed_bits(opy->offset, ins->addr_size) !=
1874                             signed_bits(opy->offset, ea_data.bytes * 8))
1875                             warn_overflow(ERR_PASS2, ea_data.bytes);
1876
1877                         type = OUT_ADDRESS;
1878                         out(offset, segment, &data, OUT_ADDRESS,
1879                             ea_data.bytes, opy->segment, opy->wrt);
1880                     }
1881                     break;
1882                 default:
1883                     /* Impossible! */
1884                     errfunc(ERR_PANIC,
1885                             "Invalid amount of bytes (%d) for offset?!",
1886                             ea_data.bytes);
1887                     break;
1888                 }
1889                 offset += s;
1890             }
1891             break;
1892
1893         default:
1894             errfunc(ERR_PANIC, "internal instruction table corrupt"
1895                     ": instruction code \\%o (0x%02X) given", c, c);
1896             break;
1897         }
1898     }
1899 }
1900
1901 static opflags_t regflag(const operand * o)
1902 {
1903     if (!is_register(o->basereg))
1904         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1905     return nasm_reg_flags[o->basereg];
1906 }
1907
1908 static int32_t regval(const operand * o)
1909 {
1910     if (!is_register(o->basereg))
1911         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1912     return nasm_regvals[o->basereg];
1913 }
1914
1915 static int op_rexflags(const operand * o, int mask)
1916 {
1917     opflags_t flags;
1918     int val;
1919
1920     if (!is_register(o->basereg))
1921         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1922
1923     flags = nasm_reg_flags[o->basereg];
1924     val = nasm_regvals[o->basereg];
1925
1926     return rexflags(val, flags, mask);
1927 }
1928
1929 static int rexflags(int val, opflags_t flags, int mask)
1930 {
1931     int rex = 0;
1932
1933     if (val >= 8)
1934         rex |= REX_B|REX_X|REX_R;
1935     if (flags & BITS64)
1936         rex |= REX_W;
1937     if (!(REG_HIGH & ~flags))                   /* AH, CH, DH, BH */
1938         rex |= REX_H;
1939     else if (!(REG8 & ~flags) && val >= 4)      /* SPL, BPL, SIL, DIL */
1940         rex |= REX_P;
1941
1942     return rex & mask;
1943 }
1944
1945 static enum match_result find_match(const struct itemplate **tempp,
1946                                     insn *instruction,
1947                                     int32_t segment, int64_t offset, int bits)
1948 {
1949     const struct itemplate *temp;
1950     enum match_result m, merr;
1951     opflags_t xsizeflags[MAX_OPERANDS];
1952     bool opsizemissing = false;
1953     int i;
1954
1955     for (i = 0; i < instruction->operands; i++)
1956         xsizeflags[i] = instruction->oprs[i].type & SIZE_MASK;
1957
1958     merr = MERR_INVALOP;
1959
1960     for (temp = nasm_instructions[instruction->opcode];
1961          temp->opcode != I_none; temp++) {
1962         m = matches(temp, instruction, bits);
1963         if (m == MOK_JUMP) {
1964             if (jmp_match(segment, offset, bits, instruction, temp->code))
1965                 m = MOK_GOOD;
1966             else
1967                 m = MERR_INVALOP;
1968         } else if (m == MERR_OPSIZEMISSING &&
1969                    (temp->flags & IF_SMASK) != IF_SX) {
1970             /*
1971              * Missing operand size and a candidate for fuzzy matching...
1972              */
1973             for (i = 0; i < temp->operands; i++) {
1974                 if ((temp->opd[i] & SAME_AS) == 0)
1975                     xsizeflags[i] |= temp->opd[i] & SIZE_MASK;
1976             }
1977             opsizemissing = true;
1978         }
1979         if (m > merr)
1980             merr = m;
1981         if (merr == MOK_GOOD)
1982             goto done;
1983     }
1984
1985     /* No match, but see if we can get a fuzzy operand size match... */
1986     if (!opsizemissing)
1987         goto done;
1988
1989     for (i = 0; i < instruction->operands; i++) {
1990         /*
1991          * We ignore extrinsic operand sizes on registers, so we should
1992          * never try to fuzzy-match on them.  This also resolves the case
1993          * when we have e.g. "xmmrm128" in two different positions.
1994          */
1995         if (is_class(REGISTER, instruction->oprs[i].type))
1996             continue;
1997
1998         /* This tests if xsizeflags[i] has more than one bit set */
1999         if ((xsizeflags[i] & (xsizeflags[i]-1)))
2000             goto done;                /* No luck */
2001
2002         instruction->oprs[i].type |= xsizeflags[i]; /* Set the size */
2003     }
2004
2005     /* Try matching again... */
2006     for (temp = nasm_instructions[instruction->opcode];
2007          temp->opcode != I_none; temp++) {
2008         m = matches(temp, instruction, bits);
2009         if (m == MOK_JUMP) {
2010             if (jmp_match(segment, offset, bits, instruction, temp->code))
2011                 m = MOK_GOOD;
2012             else
2013                 m = MERR_INVALOP;
2014         }
2015         if (m > merr)
2016             merr = m;
2017         if (merr == MOK_GOOD)
2018             goto done;
2019     }
2020
2021 done:
2022     *tempp = temp;
2023     return merr;
2024 }
2025
2026 static enum match_result matches(const struct itemplate *itemp,
2027                                  insn *instruction, int bits)
2028 {
2029     int i, size[MAX_OPERANDS], asize, oprs;
2030     bool opsizemissing = false;
2031
2032     /*
2033      * Check the opcode
2034      */
2035     if (itemp->opcode != instruction->opcode)
2036         return MERR_INVALOP;
2037
2038     /*
2039      * Count the operands
2040      */
2041     if (itemp->operands != instruction->operands)
2042         return MERR_INVALOP;
2043
2044     /*
2045      * Check that no spurious colons or TOs are present
2046      */
2047     for (i = 0; i < itemp->operands; i++)
2048         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
2049             return MERR_INVALOP;
2050
2051     /*
2052      * Process size flags
2053      */
2054     switch (itemp->flags & IF_SMASK) {
2055     case IF_SB:
2056         asize = BITS8;
2057         break;
2058     case IF_SW:
2059         asize = BITS16;
2060         break;
2061     case IF_SD:
2062         asize = BITS32;
2063         break;
2064     case IF_SQ:
2065         asize = BITS64;
2066         break;
2067     case IF_SO:
2068         asize = BITS128;
2069         break;
2070     case IF_SY:
2071         asize = BITS256;
2072         break;
2073     case IF_SZ:
2074         switch (bits) {
2075         case 16:
2076             asize = BITS16;
2077             break;
2078         case 32:
2079             asize = BITS32;
2080             break;
2081         case 64:
2082             asize = BITS64;
2083             break;
2084         default:
2085             asize = 0;
2086             break;
2087         }
2088         break;
2089     default:
2090         asize = 0;
2091         break;
2092     }
2093
2094     if (itemp->flags & IF_ARMASK) {
2095         /* S- flags only apply to a specific operand */
2096         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
2097         memset(size, 0, sizeof size);
2098         size[i] = asize;
2099     } else {
2100         /* S- flags apply to all operands */
2101         for (i = 0; i < MAX_OPERANDS; i++)
2102             size[i] = asize;
2103     }
2104
2105     /*
2106      * Check that the operand flags all match up,
2107      * it's a bit tricky so lets be verbose:
2108      *
2109      * 1) Find out the size of operand. If instruction
2110      *    doesn't have one specified -- we're trying to
2111      *    guess it either from template (IF_S* flag) or
2112      *    from code bits.
2113      *
2114      * 2) If template operand (i) has SAME_AS flag [used for registers only]
2115      *    (ie the same operand as was specified somewhere in template, and
2116      *    this referred operand index is being achieved via ~SAME_AS)
2117      *    we are to be sure that both registers (in template and instruction)
2118      *    do exactly match.
2119      *
2120      * 3) If template operand do not match the instruction OR
2121      *    template has an operand size specified AND this size differ
2122      *    from which instruction has (perhaps we got it from code bits)
2123      *    we are:
2124      *      a)  Check that only size of instruction and operand is differ
2125      *          other characteristics do match
2126      *      b)  Perhaps it's a register specified in instruction so
2127      *          for such a case we just mark that operand as "size
2128      *          missing" and this will turn on fuzzy operand size
2129      *          logic facility (handled by a caller)
2130      */
2131     for (i = 0; i < itemp->operands; i++) {
2132         opflags_t type = instruction->oprs[i].type;
2133         if (!(type & SIZE_MASK))
2134             type |= size[i];
2135
2136         if (itemp->opd[i] & SAME_AS) {
2137             int j = itemp->opd[i] & ~SAME_AS;
2138             if (type != instruction->oprs[j].type ||
2139                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2140                 return MERR_INVALOP;
2141         } else if (itemp->opd[i] & ~type ||
2142             ((itemp->opd[i] & SIZE_MASK) &&
2143              ((itemp->opd[i] ^ type) & SIZE_MASK))) {
2144             if ((itemp->opd[i] & ~type & ~SIZE_MASK) || (type & SIZE_MASK)) {
2145                 return MERR_INVALOP;
2146             } else if (!is_class(REGISTER, type)) {
2147                 /*
2148                  * Note: we don't honor extrinsic operand sizes for registers,
2149                  * so "missing operand size" for a register should be
2150                  * considered a wildcard match rather than an error.
2151                  */
2152                 opsizemissing = true;
2153             }
2154         }
2155     }
2156
2157     if (opsizemissing)
2158         return MERR_OPSIZEMISSING;
2159
2160     /*
2161      * Check operand sizes
2162      */
2163     if (itemp->flags & (IF_SM | IF_SM2)) {
2164         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2165         for (i = 0; i < oprs; i++) {
2166             asize = itemp->opd[i] & SIZE_MASK;
2167             if (asize) {
2168                 for (i = 0; i < oprs; i++)
2169                     size[i] = asize;
2170                 break;
2171             }
2172         }
2173     } else {
2174         oprs = itemp->operands;
2175     }
2176
2177     for (i = 0; i < itemp->operands; i++) {
2178         if (!(itemp->opd[i] & SIZE_MASK) &&
2179             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2180             return MERR_OPSIZEMISMATCH;
2181     }
2182
2183     /*
2184      * Check template is okay at the set cpu level
2185      */
2186     if (((itemp->flags & IF_PLEVEL) > cpu))
2187         return MERR_BADCPU;
2188
2189     /*
2190      * Verify the appropriate long mode flag.
2191      */
2192     if ((itemp->flags & (bits == 64 ? IF_NOLONG : IF_LONG)))
2193         return MERR_BADMODE;
2194
2195     /*
2196      * Check if special handling needed for Jumps
2197      */
2198     if ((itemp->code[0] & 0374) == 0370)
2199         return MOK_JUMP;
2200
2201     return MOK_GOOD;
2202 }
2203
2204 static ea *process_ea(operand * input, ea * output, int bits,
2205                       int addrbits, int rfield, opflags_t rflags)
2206 {
2207     bool forw_ref = !!(input->opflags & OPFLAG_UNKNOWN);
2208
2209     output->rip = false;
2210
2211     /* REX flags for the rfield operand */
2212     output->rex |= rexflags(rfield, rflags, REX_R | REX_P | REX_W | REX_H);
2213
2214     if (is_class(REGISTER, input->type)) {  /* register direct */
2215         int i;
2216         opflags_t f;
2217
2218         if (!is_register(input->basereg))
2219             return NULL;
2220         f = regflag(input);
2221         i = nasm_regvals[input->basereg];
2222
2223         if (REG_EA & ~f)
2224             return NULL;        /* Invalid EA register */
2225
2226         output->rex |= op_rexflags(input, REX_B | REX_P | REX_W | REX_H);
2227
2228         output->sib_present = false;    /* no SIB necessary */
2229         output->bytes = 0;              /* no offset necessary either */
2230         output->modrm = 0xC0 | ((rfield & 7) << 3) | (i & 7);
2231     } else {                    /* it's a memory reference */
2232         if (input->basereg == -1 &&
2233             (input->indexreg == -1 || input->scale == 0)) {
2234             /* it's a pure offset */
2235
2236             if (bits == 64 && ((input->type & IP_REL) == IP_REL) &&
2237                 input->segment == NO_SEG) {
2238                 nasm_error(ERR_WARNING | ERR_PASS1, "absolute address can not be RIP-relative");
2239                 input->type &= ~IP_REL;
2240                 input->type |= MEMORY;
2241             }
2242
2243             if (input->eaflags & EAF_BYTEOFFS ||
2244                 (input->eaflags & EAF_WORDOFFS &&
2245                  input->disp_size != (addrbits != 16 ? 32 : 16))) {
2246                 nasm_error(ERR_WARNING | ERR_PASS1, "displacement size ignored on absolute address");
2247             }
2248
2249             if (bits == 64 && (~input->type & IP_REL)) {
2250                 int scale, index, base;
2251                 output->sib_present = true;
2252                 scale = 0;
2253                 index = 4;
2254                 base = 5;
2255                 output->sib = (scale << 6) | (index << 3) | base;
2256                 output->bytes = 4;
2257                 output->modrm = 4 | ((rfield & 7) << 3);
2258                 output->rip = false;
2259             } else {
2260                 output->sib_present = false;
2261                 output->bytes = (addrbits != 16 ? 4 : 2);
2262                 output->modrm = (addrbits != 16 ? 5 : 6) | ((rfield & 7) << 3);
2263                 output->rip = bits == 64;
2264             }
2265         } else {                /* it's an indirection */
2266             int i = input->indexreg, b = input->basereg, s = input->scale;
2267             int32_t seg = input->segment;
2268             int hb = input->hintbase, ht = input->hinttype;
2269             int t, it, bt;              /* register numbers */
2270             opflags_t x, ix, bx;        /* register flags */
2271
2272             if (s == 0)
2273                 i = -1;         /* make this easy, at least */
2274
2275             if (is_register(i)) {
2276                 it = nasm_regvals[i];
2277                 ix = nasm_reg_flags[i];
2278             } else {
2279                 it = -1;
2280                 ix = 0;
2281             }
2282
2283             if (is_register(b)) {
2284                 bt = nasm_regvals[b];
2285                 bx = nasm_reg_flags[b];
2286             } else {
2287                 bt = -1;
2288                 bx = 0;
2289             }
2290
2291             /* check for a 32/64-bit memory reference... */
2292             if ((ix|bx) & (BITS32|BITS64)) {
2293                 /*
2294                  * it must be a 32/64-bit memory reference. Firstly we have
2295                  * to check that all registers involved are type E/Rxx.
2296                  */
2297                 int32_t sok = BITS32 | BITS64, o = input->offset;
2298
2299                 if (it != -1) {
2300                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2301                         sok &= ix;
2302                     else
2303                         return NULL;
2304                 }
2305
2306                 if (bt != -1) {
2307                     if (REG_GPR & ~bx)
2308                         return NULL; /* Invalid register */
2309                     if (~sok & bx & SIZE_MASK)
2310                         return NULL; /* Invalid size */
2311                     sok &= bx;
2312                 }
2313
2314                 /*
2315                  * While we're here, ensure the user didn't specify
2316                  * WORD or QWORD
2317                  */
2318                 if (input->disp_size == 16 || input->disp_size == 64)
2319                     return NULL;
2320
2321                 if (addrbits == 16 ||
2322                     (addrbits == 32 && !(sok & BITS32)) ||
2323                     (addrbits == 64 && !(sok & BITS64)))
2324                     return NULL;
2325
2326                 /* now reorganize base/index */
2327                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2328                     ((hb == b && ht == EAH_NOTBASE) ||
2329                      (hb == i && ht == EAH_MAKEBASE))) {
2330                     /* swap if hints say so */
2331                     t = bt, bt = it, it = t;
2332                     x = bx, bx = ix, ix = x;
2333                 }
2334                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2335                     bt = -1, bx = 0, s++;
2336                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2337                     /* make single reg base, unless hint */
2338                     bt = it, bx = ix, it = -1, ix = 0;
2339                 }
2340                 if (((s == 2 && it != REG_NUM_ESP && !(input->eaflags & EAF_TIMESTWO)) ||
2341                       s == 3 || s == 5 || s == 9) && bt == -1)
2342                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2343                 if (it == -1 && (bt & 7) != REG_NUM_ESP &&
2344                     (input->eaflags & EAF_TIMESTWO))
2345                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2346                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2347                 if (s == 1 && it == REG_NUM_ESP) {
2348                     /* swap ESP into base if scale is 1 */
2349                     t = it, it = bt, bt = t;
2350                     x = ix, ix = bx, bx = x;
2351                 }
2352                 if (it == REG_NUM_ESP ||
2353                     (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2354                     return NULL;        /* wrong, for various reasons */
2355
2356                 output->rex |= rexflags(it, ix, REX_X);
2357                 output->rex |= rexflags(bt, bx, REX_B);
2358
2359                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2360                     /* no SIB needed */
2361                     int mod, rm;
2362
2363                     if (bt == -1) {
2364                         rm = 5;
2365                         mod = 0;
2366                     } else {
2367                         rm = (bt & 7);
2368                         if (rm != REG_NUM_EBP && o == 0 &&
2369                             seg == NO_SEG && !forw_ref &&
2370                             !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2371                             mod = 0;
2372                         else if (input->eaflags & EAF_BYTEOFFS ||
2373                                  (o >= -128 && o <= 127 &&
2374                                   seg == NO_SEG && !forw_ref &&
2375                                   !(input->eaflags & EAF_WORDOFFS)))
2376                             mod = 1;
2377                         else
2378                             mod = 2;
2379                     }
2380
2381                     output->sib_present = false;
2382                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2383                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2384                 } else {
2385                     /* we need a SIB */
2386                     int mod, scale, index, base;
2387
2388                     if (it == -1)
2389                         index = 4, s = 1;
2390                     else
2391                         index = (it & 7);
2392
2393                     switch (s) {
2394                     case 1:
2395                         scale = 0;
2396                         break;
2397                     case 2:
2398                         scale = 1;
2399                         break;
2400                     case 4:
2401                         scale = 2;
2402                         break;
2403                     case 8:
2404                         scale = 3;
2405                         break;
2406                     default:   /* then what the smeg is it? */
2407                         return NULL;    /* panic */
2408                     }
2409
2410                     if (bt == -1) {
2411                         base = 5;
2412                         mod = 0;
2413                     } else {
2414                         base = (bt & 7);
2415                         if (base != REG_NUM_EBP && o == 0 &&
2416                             seg == NO_SEG && !forw_ref &&
2417                             !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2418                             mod = 0;
2419                         else if (input->eaflags & EAF_BYTEOFFS ||
2420                                  (o >= -128 && o <= 127 &&
2421                                   seg == NO_SEG && !forw_ref &&
2422                                   !(input->eaflags & EAF_WORDOFFS)))
2423                             mod = 1;
2424                         else
2425                             mod = 2;
2426                     }
2427
2428                     output->sib_present = true;
2429                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2430                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | 4;
2431                     output->sib = (scale << 6) | (index << 3) | base;
2432                 }
2433             } else {            /* it's 16-bit */
2434                 int mod, rm;
2435                 int16_t o = input->offset;
2436
2437                 /* check for 64-bit long mode */
2438                 if (addrbits == 64)
2439                     return NULL;
2440
2441                 /* check all registers are BX, BP, SI or DI */
2442                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI && b != R_DI) ||
2443                     (i != -1 && i != R_BP && i != R_BX && i != R_SI && i != R_DI))
2444                     return NULL;
2445
2446                 /* ensure the user didn't specify DWORD/QWORD */
2447                 if (input->disp_size == 32 || input->disp_size == 64)
2448                     return NULL;
2449
2450                 if (s != 1 && i != -1)
2451                     return NULL;        /* no can do, in 16-bit EA */
2452                 if (b == -1 && i != -1) {
2453                     int tmp = b;
2454                     b = i;
2455                     i = tmp;
2456                 }               /* swap */
2457                 if ((b == R_SI || b == R_DI) && i != -1) {
2458                     int tmp = b;
2459                     b = i;
2460                     i = tmp;
2461                 }
2462                 /* have BX/BP as base, SI/DI index */
2463                 if (b == i)
2464                     return NULL;        /* shouldn't ever happen, in theory */
2465                 if (i != -1 && b != -1 &&
2466                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2467                     return NULL;        /* invalid combinations */
2468                 if (b == -1)            /* pure offset: handled above */
2469                     return NULL;        /* so if it gets to here, panic! */
2470
2471                 rm = -1;
2472                 if (i != -1)
2473                     switch (i * 256 + b) {
2474                     case R_SI * 256 + R_BX:
2475                         rm = 0;
2476                         break;
2477                     case R_DI * 256 + R_BX:
2478                         rm = 1;
2479                         break;
2480                     case R_SI * 256 + R_BP:
2481                         rm = 2;
2482                         break;
2483                     case R_DI * 256 + R_BP:
2484                         rm = 3;
2485                         break;
2486                 } else
2487                     switch (b) {
2488                     case R_SI:
2489                         rm = 4;
2490                         break;
2491                     case R_DI:
2492                         rm = 5;
2493                         break;
2494                     case R_BP:
2495                         rm = 6;
2496                         break;
2497                     case R_BX:
2498                         rm = 7;
2499                         break;
2500                     }
2501                 if (rm == -1)           /* can't happen, in theory */
2502                     return NULL;        /* so panic if it does */
2503
2504                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2505                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2506                     mod = 0;
2507                 else if (input->eaflags & EAF_BYTEOFFS ||
2508                          (o >= -128 && o <= 127 && seg == NO_SEG &&
2509                           !forw_ref && !(input->eaflags & EAF_WORDOFFS)))
2510                     mod = 1;
2511                 else
2512                     mod = 2;
2513
2514                 output->sib_present = false;    /* no SIB - it's 16-bit */
2515                 output->bytes = mod;            /* bytes of offset needed */
2516                 output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2517             }
2518         }
2519     }
2520
2521     output->size = 1 + output->sib_present + output->bytes;
2522     return output;
2523 }
2524
2525 static void add_asp(insn *ins, int addrbits)
2526 {
2527     int j, valid;
2528     int defdisp;
2529
2530     valid = (addrbits == 64) ? 64|32 : 32|16;
2531
2532     switch (ins->prefixes[PPS_ASIZE]) {
2533     case P_A16:
2534         valid &= 16;
2535         break;
2536     case P_A32:
2537         valid &= 32;
2538         break;
2539     case P_A64:
2540         valid &= 64;
2541         break;
2542     case P_ASP:
2543         valid &= (addrbits == 32) ? 16 : 32;
2544         break;
2545     default:
2546         break;
2547     }
2548
2549     for (j = 0; j < ins->operands; j++) {
2550         if (is_class(MEMORY, ins->oprs[j].type)) {
2551             opflags_t i, b;
2552
2553             /* Verify as Register */
2554             if (!is_register(ins->oprs[j].indexreg))
2555                 i = 0;
2556             else
2557                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2558
2559             /* Verify as Register */
2560             if (!is_register(ins->oprs[j].basereg))
2561                 b = 0;
2562             else
2563                 b = nasm_reg_flags[ins->oprs[j].basereg];
2564
2565             if (ins->oprs[j].scale == 0)
2566                 i = 0;
2567
2568             if (!i && !b) {
2569                 int ds = ins->oprs[j].disp_size;
2570                 if ((addrbits != 64 && ds > 8) ||
2571                     (addrbits == 64 && ds == 16))
2572                     valid &= ds;
2573             } else {
2574                 if (!(REG16 & ~b))
2575                     valid &= 16;
2576                 if (!(REG32 & ~b))
2577                     valid &= 32;
2578                 if (!(REG64 & ~b))
2579                     valid &= 64;
2580
2581                 if (!(REG16 & ~i))
2582                     valid &= 16;
2583                 if (!(REG32 & ~i))
2584                     valid &= 32;
2585                 if (!(REG64 & ~i))
2586                     valid &= 64;
2587             }
2588         }
2589     }
2590
2591     if (valid & addrbits) {
2592         ins->addr_size = addrbits;
2593     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2594         /* Add an address size prefix */
2595         enum prefixes pref = (addrbits == 32) ? P_A16 : P_A32;
2596         ins->prefixes[PPS_ASIZE] = pref;
2597         ins->addr_size = (addrbits == 32) ? 16 : 32;
2598     } else {
2599         /* Impossible... */
2600         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2601         ins->addr_size = addrbits; /* Error recovery */
2602     }
2603
2604     defdisp = ins->addr_size == 16 ? 16 : 32;
2605
2606     for (j = 0; j < ins->operands; j++) {
2607         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2608             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp) != ins->addr_size) {
2609             /*
2610              * mem_offs sizes must match the address size; if not,
2611              * strip the MEM_OFFS bit and match only EA instructions
2612              */
2613             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2614         }
2615     }
2616 }