assemble.c

   1 /* ----------------------------------------------------------------------- *
   2  *
   3  *   Copyright 1996-2010 The NASM Authors - All Rights Reserved
   4  *   See the file AUTHORS included with the NASM distribution for
   5  *   the specific copyright holders.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following
   9  *   conditions are met:
  10  *
  11  *   * Redistributions of source code must retain the above copyright
  12  *     notice, this list of conditions and the following disclaimer.
  13  *   * Redistributions in binary form must reproduce the above
  14  *     copyright notice, this list of conditions and the following
  15  *     disclaimer in the documentation and/or other materials provided
  16  *     with the distribution.
  17  *
  18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  19  *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  20  *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  21  *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22  *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  23  *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24  *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  25  *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28  *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  29  *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  30  *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31  *
  32  * ----------------------------------------------------------------------- */
  33
  34 /*
  35  * assemble.c   code generation for the Netwide Assembler
  36  *
  37  * the actual codes (C syntax, i.e. octal):
  38  * \0            - terminates the code. (Unless it's a literal of course.)
  39  * \1..\4        - that many literal bytes follow in the code stream
  40  * \5            - add 4 to the primary operand number (b, low octdigit)
  41  * \6            - add 4 to the secondary operand number (a, middle octdigit)
  42  * \7            - add 4 to both the primary and the secondary operand number
  43  * \10..\13      - a literal byte follows in the code stream, to be added
  44  *                 to the register value of operand 0..3
  45  * \14..\17      - a signed byte immediate operand, from operand 0..3
  46  * \20..\23      - a byte immediate operand, from operand 0..3
  47  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  48  * \30..\33      - a word immediate operand, from operand 0..3
  49  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  50  *                 assembly mode or the operand-size override on the operand
  51  * \40..\43      - a long immediate operand, from operand 0..3
  52  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  53  *                 depending on the address size of the instruction.
  54  * \50..\53      - a byte relative operand, from operand 0..3
  55  * \54..\57      - a qword immediate operand, from operand 0..3
  56  * \60..\63      - a word relative operand, from operand 0..3
  57  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  58  *                 assembly mode or the operand-size override on the operand
  59  * \70..\73      - a long relative operand, from operand 0..3
  60  * \74..\77      - a word constant, from the _segment_ part of operand 0..3
  61  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  62  *                 field the register value of operand b.
  63  * \140..\143    - an immediate word or signed byte for operand 0..3
  64  * \144..\147    - or 2 (s-field) into opcode byte if operand 0..3
  65  *                  is a signed byte rather than a word.  Opcode byte follows.
  66  * \150..\153    - an immediate dword or signed byte for operand 0..3
  67  * \154..\157    - or 2 (s-field) into opcode byte if operand 0..3
  68  *                  is a signed byte rather than a dword.  Opcode byte follows.
  69  * \160..\163    - this instruction uses DREX rather than REX, with the
  70  *                 OC0 field set to 0, and the dest field taken from
  71  *                 operand 0..3.
  72  * \164..\167    - this instruction uses DREX rather than REX, with the
  73  *                 OC0 field set to 1, and the dest field taken from
  74  *                 operand 0..3.
  75  * \171          - placement of DREX suffix in the absence of an EA
  76  * \172\ab       - the register number from operand a in bits 7..4, with
  77  *                 the 4-bit immediate from operand b in bits 3..0.
  78  * \173\xab      - the register number from operand a in bits 7..4, with
  79  *                 the value b in bits 3..0.
  80  * \174\a        - the register number from operand a in bits 7..4, and
  81  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  82  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  83  *                 field equal to digit b.
  84  * \250..\253    - same as \150..\153, except warn if the 64-bit operand
  85  *                 is not equal to the truncated and sign-extended 32-bit
  86  *                 operand; used for 32-bit immediates in 64-bit mode.
  87  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
  88  * \260..\263    - this instruction uses VEX/XOP rather than REX, with the
  89  *                 V field taken from operand 0..3.
  90  * \270          - this instruction uses VEX/XOP rather than REX, with the
  91  *                 V field set to 1111b.
  92  *
  93  * VEX/XOP prefixes are followed by the sequence:
  94  * \tmm\wlp        where mm is the M field; and wlp is:
  95  *                 00 0ww lpp
  96  *                 [w0] ww = 0 for W = 0
  97  *                 [w1] ww = 1 for W = 1
  98  *                 [wx] ww = 2 for W don't care (always assembled as 0)
  99  *                 [ww] ww = 3 for W used as REX.W
 100  *
 101  * t = 0 for VEX (C4/C5), t = 1 for XOP (8F).
 102  *
 103  * \274..\277    - a signed byte immediate operand, from operand 0..3,
 104  *                 which is to be extended to the operand size.
 105  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
 106  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
 107  * \312          - (disassembler only) invalid with non-default address size.
 108  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
 109  * \314          - (disassembler only) invalid with REX.B
 110  * \315          - (disassembler only) invalid with REX.X
 111  * \316          - (disassembler only) invalid with REX.R
 112  * \317          - (disassembler only) invalid with REX.W
 113  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
 114  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
 115  * \322          - indicates that this instruction is only valid when the
 116  *                 operand size is the default (instruction to disassembler,
 117  *                 generates no code in the assembler)
 118  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
 119  * \324          - indicates 64-bit operand size requiring REX prefix.
 120  * \325          - instruction which always uses spl/bpl/sil/dil
 121  * \330          - a literal byte follows in the code stream, to be added
 122  *                 to the condition code value of the instruction.
 123  * \331          - instruction not valid with REP prefix.  Hint for
 124  *                 disassembler only; for SSE instructions.
 125  * \332          - REP prefix (0xF2 byte) used as opcode extension.
 126  * \333          - REP prefix (0xF3 byte) used as opcode extension.
 127  * \334          - LOCK prefix used as REX.R (used in non-64-bit mode)
 128  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
 129  * \336          - force a REP(E) prefix (0xF2) even if not specified.
 130  * \337          - force a REPNE prefix (0xF3) even if not specified.
 131  *                 \336-\337 are still listed as prefixes in the disassembler.
 132  * \340          - reserve <operand 0> bytes of uninitialized storage.
 133  *                 Operand 0 had better be a segmentless constant.
 134  * \341          - this instruction needs a WAIT "prefix"
 135  * \344,\345     - the PUSH/POP (respectively) codes for CS, DS, ES, SS
 136  *                 (POP is never used for CS) depending on operand 0
 137  * \346,\347     - the second byte of PUSH/POP codes for FS, GS, depending
 138  *                 on operand 0
 139  * \360          - no SSE prefix (== \364\331)
 140  * \361          - 66 SSE prefix (== \366\331)
 141  * \362          - F2 SSE prefix (== \364\332)
 142  * \363          - F3 SSE prefix (== \364\333)
 143  * \364          - operand-size prefix (0x66) not permitted
 144  * \365          - address-size prefix (0x67) not permitted
 145  * \366          - operand-size prefix (0x66) used as opcode extension
 146  * \367          - address-size prefix (0x67) used as opcode extension
 147  * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
 148  *                 370 is used for Jcc, 371 is used for JMP.
 149  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 150  *                 used for conditional jump over longer jump
 151  */
 152
 153 #include "compiler.h"
 154
 155 #include <stdio.h>
 156 #include <string.h>
 157 #include <inttypes.h>
 158
 159 #include "nasm.h"
 160 #include "nasmlib.h"
 161 #include "assemble.h"
 162 #include "insns.h"
 163 #include "tables.h"
 164
 165 enum match_result {
 166     /*
 167      * Matching errors.  These should be sorted so that more specific
 168      * errors come later in the sequence.
 169      */
 170     MERR_INVALOP,
 171     MERR_OPSIZEMISSING,
 172     MERR_OPSIZEMISMATCH,
 173     MERR_BADCPU,
 174     MERR_BADMODE,
 175     /*
 176      * Matching success; the conditional ones first
 177      */
 178     MOK_JUMP,                   /* Matching OK but needs jmp_match() */
 179     MOK_GOOD                    /* Matching unconditionally OK */
 180 };
 181
 182 typedef struct {
 183     int sib_present;                 /* is a SIB byte necessary? */
 184     int bytes;                       /* # of bytes of offset needed */
 185     int size;                        /* lazy - this is sib+bytes+1 */
 186     uint8_t modrm, sib, rex, rip;    /* the bytes themselves */
 187 } ea;
 188
 189 static uint32_t cpu;            /* cpu level received from nasm.c */
 190 static efunc errfunc;
 191 static struct ofmt *outfmt;
 192 static ListGen *list;
 193
 194 static int64_t calcsize(int32_t, int64_t, int, insn *, const uint8_t *);
 195 static void gencode(int32_t segment, int64_t offset, int bits,
 196                     insn * ins, const struct itemplate *temp,
 197                     int64_t insn_end);
 198 static enum match_result find_match(const struct itemplate **tempp,
 199                                     insn *instruction,
 200                                     int32_t segment, int64_t offset, int bits);
 201 static enum match_result matches(const struct itemplate *, insn *, int bits);
 202 static opflags_t regflag(const operand *);
 203 static int32_t regval(const operand *);
 204 static int rexflags(int, opflags_t, int);
 205 static int op_rexflags(const operand *, int);
 206 static ea *process_ea(operand *, ea *, int, int, int, opflags_t);
 207 static void add_asp(insn *, int);
 208
 209 static int has_prefix(insn * ins, enum prefix_pos pos, enum prefixes prefix)
 210 {
 211     return ins->prefixes[pos] == prefix;
 212 }
 213
 214 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 215 {
 216     if (ins->prefixes[pos])
 217         errfunc(ERR_NONFATAL, "invalid %s prefix",
 218                 prefix_name(ins->prefixes[pos]));
 219 }
 220
 221 static const char *size_name(int size)
 222 {
 223     switch (size) {
 224     case 1:
 225         return "byte";
 226     case 2:
 227         return "word";
 228     case 4:
 229         return "dword";
 230     case 8:
 231         return "qword";
 232     case 10:
 233         return "tword";
 234     case 16:
 235         return "oword";
 236     case 32:
 237         return "yword";
 238     default:
 239         return "???";
 240     }
 241 }
 242
 243 static void warn_overflow(int pass, int size)
 244 {
 245     errfunc(ERR_WARNING | pass | ERR_WARN_NOV,
 246             "%s data exceeds bounds", size_name(size));
 247 }
 248
 249 static void warn_overflow_const(int64_t data, int size)
 250 {
 251     if (overflow_general(data, size))
 252         warn_overflow(ERR_PASS1, size);
 253 }
 254
 255 static void warn_overflow_opd(const struct operand *o, int size)
 256 {
 257     if (o->wrt == NO_SEG && o->segment == NO_SEG) {
 258         if (overflow_general(o->offset, size))
 259             warn_overflow(ERR_PASS2, size);
 260     }
 261 }
 262
 263 /*
 264  * This routine wrappers the real output format's output routine,
 265  * in order to pass a copy of the data off to the listing file
 266  * generator at the same time.
 267  */
 268 static void out(int64_t offset, int32_t segto, const void *data,
 269                 enum out_type type, uint64_t size,
 270                 int32_t segment, int32_t wrt)
 271 {
 272     static int32_t lineno = 0;     /* static!!! */
 273     static char *lnfname = NULL;
 274     uint8_t p[8];
 275
 276     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 277         /*
 278          * This is a non-relocated address, and we're going to
 279          * convert it into RAWDATA format.
 280          */
 281         uint8_t *q = p;
 282
 283         if (size > 8) {
 284             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 285             return;
 286         }
 287
 288         WRITEADDR(q, *(int64_t *)data, size);
 289         data = p;
 290         type = OUT_RAWDATA;
 291     }
 292
 293     list->output(offset, data, type, size);
 294
 295     /*
 296      * this call to src_get determines when we call the
 297      * debug-format-specific "linenum" function
 298      * it updates lineno and lnfname to the current values
 299      * returning 0 if "same as last time", -2 if lnfname
 300      * changed, and the amount by which lineno changed,
 301      * if it did. thus, these variables must be static
 302      */
 303
 304     if (src_get(&lineno, &lnfname)) {
 305         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 306     }
 307
 308     outfmt->output(segto, data, type, size, segment, wrt);
 309 }
 310
 311 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 312                      insn * ins, const uint8_t *code)
 313 {
 314     int64_t isize;
 315     uint8_t c = code[0];
 316
 317     if ((c != 0370 && c != 0371) || (ins->oprs[0].type & STRICT))
 318         return false;
 319     if (!optimizing)
 320         return false;
 321     if (optimizing < 0 && c == 0371)
 322         return false;
 323
 324     isize = calcsize(segment, offset, bits, ins, code);
 325
 326     if (ins->oprs[0].opflags & OPFLAG_UNKNOWN)
 327         /* Be optimistic in pass 1 */
 328         return true;
 329
 330     if (ins->oprs[0].segment != segment)
 331         return false;
 332
 333     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 334     return (isize >= -128 && isize <= 127); /* is it byte size? */
 335 }
 336
 337 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 338                  insn * instruction, struct ofmt *output, efunc error,
 339                  ListGen * listgen)
 340 {
 341     const struct itemplate *temp;
 342     int j;
 343     enum match_result m;
 344     int64_t insn_end;
 345     int32_t itimes;
 346     int64_t start = offset;
 347     int64_t wsize;              /* size for DB etc. */
 348
 349     errfunc = error;            /* to pass to other functions */
 350     cpu = cp;
 351     outfmt = output;            /* likewise */
 352     list = listgen;             /* and again */
 353
 354     wsize = idata_bytes(instruction->opcode);
 355     if (wsize == -1)
 356         return 0;
 357
 358     if (wsize) {
 359         extop *e;
 360         int32_t t = instruction->times;
 361         if (t < 0)
 362             errfunc(ERR_PANIC,
 363                     "instruction->times < 0 (%ld) in assemble()", t);
 364
 365         while (t--) {           /* repeat TIMES times */
 366             list_for_each(e, instruction->eops) {
 367                 if (e->type == EOT_DB_NUMBER) {
 368                     if (wsize > 8) {
 369                         errfunc(ERR_NONFATAL,
 370                                 "integer supplied to a DT, DO or DY"
 371                                 " instruction");
 372                     } else {
 373                         out(offset, segment, &e->offset,
 374                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 375                         offset += wsize;
 376                     }
 377                 } else if (e->type == EOT_DB_STRING ||
 378                            e->type == EOT_DB_STRING_FREE) {
 379                     int align;
 380
 381                     out(offset, segment, e->stringval,
 382                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 383                     align = e->stringlen % wsize;
 384
 385                     if (align) {
 386                         align = wsize - align;
 387                         out(offset, segment, zero_buffer,
 388                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 389                     }
 390                     offset += e->stringlen + align;
 391                 }
 392             }
 393             if (t > 0 && t == instruction->times - 1) {
 394                 /*
 395                  * Dummy call to list->output to give the offset to the
 396                  * listing module.
 397                  */
 398                 list->output(offset, NULL, OUT_RAWDATA, 0);
 399                 list->uplevel(LIST_TIMES);
 400             }
 401         }
 402         if (instruction->times > 1)
 403             list->downlevel(LIST_TIMES);
 404         return offset - start;
 405     }
 406
 407     if (instruction->opcode == I_INCBIN) {
 408         const char *fname = instruction->eops->stringval;
 409         FILE *fp;
 410
 411         fp = fopen(fname, "rb");
 412         if (!fp) {
 413             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 414                   fname);
 415         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 416             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 417                   fname);
 418         } else {
 419             static char buf[4096];
 420             size_t t = instruction->times;
 421             size_t base = 0;
 422             size_t len;
 423
 424             len = ftell(fp);
 425             if (instruction->eops->next) {
 426                 base = instruction->eops->next->offset;
 427                 len -= base;
 428                 if (instruction->eops->next->next &&
 429                     len > (size_t)instruction->eops->next->next->offset)
 430                     len = (size_t)instruction->eops->next->next->offset;
 431             }
 432             /*
 433              * Dummy call to list->output to give the offset to the
 434              * listing module.
 435              */
 436             list->output(offset, NULL, OUT_RAWDATA, 0);
 437             list->uplevel(LIST_INCBIN);
 438             while (t--) {
 439                 size_t l;
 440
 441                 fseek(fp, base, SEEK_SET);
 442                 l = len;
 443                 while (l > 0) {
 444                     int32_t m;
 445                     m = fread(buf, 1, l > sizeof(buf) ? sizeof(buf) : l, fp);
 446                     if (!m) {
 447                         /*
 448                          * This shouldn't happen unless the file
 449                          * actually changes while we are reading
 450                          * it.
 451                          */
 452                         error(ERR_NONFATAL,
 453                               "`incbin': unexpected EOF while"
 454                               " reading file `%s'", fname);
 455                         t = 0;  /* Try to exit cleanly */
 456                         break;
 457                     }
 458                     out(offset, segment, buf, OUT_RAWDATA, m,
 459                         NO_SEG, NO_SEG);
 460                     l -= m;
 461                 }
 462             }
 463             list->downlevel(LIST_INCBIN);
 464             if (instruction->times > 1) {
 465                 /*
 466                  * Dummy call to list->output to give the offset to the
 467                  * listing module.
 468                  */
 469                 list->output(offset, NULL, OUT_RAWDATA, 0);
 470                 list->uplevel(LIST_TIMES);
 471                 list->downlevel(LIST_TIMES);
 472             }
 473             fclose(fp);
 474             return instruction->times * len;
 475         }
 476         return 0;               /* if we're here, there's an error */
 477     }
 478
 479     /* Check to see if we need an address-size prefix */
 480     add_asp(instruction, bits);
 481
 482     m = find_match(&temp, instruction, segment, offset, bits);
 483
 484     if (m == MOK_GOOD) {
 485         /* Matches! */
 486         int64_t insn_size = calcsize(segment, offset, bits,
 487                                      instruction, temp->code);
 488         itimes = instruction->times;
 489         if (insn_size < 0)  /* shouldn't be, on pass two */
 490             error(ERR_PANIC, "errors made it through from pass one");
 491         else
 492             while (itimes--) {
 493                 for (j = 0; j < MAXPREFIX; j++) {
 494                     uint8_t c = 0;
 495                     switch (instruction->prefixes[j]) {
 496                     case P_WAIT:
 497                         c = 0x9B;
 498                         break;
 499                     case P_LOCK:
 500                         c = 0xF0;
 501                         break;
 502                     case P_REPNE:
 503                     case P_REPNZ:
 504                         c = 0xF2;
 505                         break;
 506                     case P_REPE:
 507                     case P_REPZ:
 508                     case P_REP:
 509                         c = 0xF3;
 510                         break;
 511                     case R_CS:
 512                         if (bits == 64) {
 513                             error(ERR_WARNING | ERR_PASS2,
 514                                   "cs segment base generated, but will be ignored in 64-bit mode");
 515                         }
 516                         c = 0x2E;
 517                         break;
 518                     case R_DS:
 519                         if (bits == 64) {
 520                             error(ERR_WARNING | ERR_PASS2,
 521                                   "ds segment base generated, but will be ignored in 64-bit mode");
 522                         }
 523                         c = 0x3E;
 524                         break;
 525                     case R_ES:
 526                         if (bits == 64) {
 527                             error(ERR_WARNING | ERR_PASS2,
 528                                   "es segment base generated, but will be ignored in 64-bit mode");
 529                         }
 530                         c = 0x26;
 531                         break;
 532                     case R_FS:
 533                         c = 0x64;
 534                         break;
 535                     case R_GS:
 536                         c = 0x65;
 537                         break;
 538                     case R_SS:
 539                         if (bits == 64) {
 540                             error(ERR_WARNING | ERR_PASS2,
 541                                   "ss segment base generated, but will be ignored in 64-bit mode");
 542                         }
 543                         c = 0x36;
 544                         break;
 545                     case R_SEGR6:
 546                     case R_SEGR7:
 547                         error(ERR_NONFATAL,
 548                               "segr6 and segr7 cannot be used as prefixes");
 549                         break;
 550                     case P_A16:
 551                         if (bits == 64) {
 552                             error(ERR_NONFATAL,
 553                                   "16-bit addressing is not supported "
 554                                   "in 64-bit mode");
 555                         } else if (bits != 16)
 556                             c = 0x67;
 557                         break;
 558                     case P_A32:
 559                         if (bits != 32)
 560                             c = 0x67;
 561                         break;
 562                     case P_A64:
 563                         if (bits != 64) {
 564                             error(ERR_NONFATAL,
 565                                   "64-bit addressing is only supported "
 566                                   "in 64-bit mode");
 567                         }
 568                         break;
 569                     case P_ASP:
 570                         c = 0x67;
 571                         break;
 572                     case P_O16:
 573                         if (bits != 16)
 574                             c = 0x66;
 575                         break;
 576                     case P_O32:
 577                         if (bits == 16)
 578                             c = 0x66;
 579                         break;
 580                     case P_O64:
 581                         /* REX.W */
 582                         break;
 583                     case P_OSP:
 584                         c = 0x66;
 585                         break;
 586                     case P_none:
 587                         break;
 588                     default:
 589                         error(ERR_PANIC, "invalid instruction prefix");
 590                     }
 591                     if (c != 0) {
 592                         out(offset, segment, &c, OUT_RAWDATA, 1,
 593                             NO_SEG, NO_SEG);
 594                         offset++;
 595                     }
 596                 }
 597                 insn_end = offset + insn_size;
 598                 gencode(segment, offset, bits, instruction,
 599                         temp, insn_end);
 600                 offset += insn_size;
 601                 if (itimes > 0 && itimes == instruction->times - 1) {
 602                     /*
 603                      * Dummy call to list->output to give the offset to the
 604                      * listing module.
 605                      */
 606                     list->output(offset, NULL, OUT_RAWDATA, 0);
 607                     list->uplevel(LIST_TIMES);
 608                 }
 609             }
 610         if (instruction->times > 1)
 611             list->downlevel(LIST_TIMES);
 612         return offset - start;
 613     } else {
 614         /* No match */
 615         switch (m) {
 616         case MERR_OPSIZEMISSING:
 617             error(ERR_NONFATAL, "operation size not specified");
 618             break;
 619         case MERR_OPSIZEMISMATCH:
 620             error(ERR_NONFATAL, "mismatch in operand sizes");
 621             break;
 622         case MERR_BADCPU:
 623             error(ERR_NONFATAL, "no instruction for this cpu level");
 624             break;
 625         case MERR_BADMODE:
 626             error(ERR_NONFATAL, "instruction not supported in %d-bit mode",
 627                   bits);
 628             break;
 629         default:
 630             error(ERR_NONFATAL,
 631                   "invalid combination of opcode and operands");
 632             break;
 633         }
 634     }
 635     return 0;
 636 }
 637
 638 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 639                   insn * instruction, efunc error)
 640 {
 641     const struct itemplate *temp;
 642     enum match_result m;
 643
 644     errfunc = error;            /* to pass to other functions */
 645     cpu = cp;
 646
 647     if (instruction->opcode == I_none)
 648         return 0;
 649
 650     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 651         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 652         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 653         instruction->opcode == I_DY) {
 654         extop *e;
 655         int32_t isize, osize, wsize;
 656
 657         isize = 0;
 658         wsize = idata_bytes(instruction->opcode);
 659
 660         list_for_each(e, instruction->eops) {
 661             int32_t align;
 662
 663             osize = 0;
 664             if (e->type == EOT_DB_NUMBER) {
 665                 osize = 1;
 666                 warn_overflow_const(e->offset, wsize);
 667             } else if (e->type == EOT_DB_STRING ||
 668                        e->type == EOT_DB_STRING_FREE)
 669                 osize = e->stringlen;
 670
 671             align = (-osize) % wsize;
 672             if (align < 0)
 673                 align += wsize;
 674             isize += osize + align;
 675         }
 676         return isize * instruction->times;
 677     }
 678
 679     if (instruction->opcode == I_INCBIN) {
 680         const char *fname = instruction->eops->stringval;
 681         FILE *fp;
 682         int64_t val = 0;
 683         size_t len;
 684
 685         fp = fopen(fname, "rb");
 686         if (!fp)
 687             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 688                   fname);
 689         else if (fseek(fp, 0L, SEEK_END) < 0)
 690             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 691                   fname);
 692         else {
 693             len = ftell(fp);
 694             if (instruction->eops->next) {
 695                 len -= instruction->eops->next->offset;
 696                 if (instruction->eops->next->next &&
 697                     len > (size_t)instruction->eops->next->next->offset) {
 698                     len = (size_t)instruction->eops->next->next->offset;
 699                 }
 700             }
 701             val = instruction->times * len;
 702         }
 703         if (fp)
 704             fclose(fp);
 705         return val;
 706     }
 707
 708     /* Check to see if we need an address-size prefix */
 709     add_asp(instruction, bits);
 710
 711     m = find_match(&temp, instruction, segment, offset, bits);
 712     if (m == MOK_GOOD) {
 713         /* we've matched an instruction. */
 714         int64_t isize;
 715         const uint8_t *codes = temp->code;
 716         int j;
 717
 718         isize = calcsize(segment, offset, bits, instruction, codes);
 719         if (isize < 0)
 720             return -1;
 721         for (j = 0; j < MAXPREFIX; j++) {
 722             switch (instruction->prefixes[j]) {
 723             case P_A16:
 724                 if (bits != 16)
 725                     isize++;
 726                 break;
 727             case P_A32:
 728                 if (bits != 32)
 729                     isize++;
 730                 break;
 731             case P_O16:
 732                 if (bits != 16)
 733                     isize++;
 734                 break;
 735             case P_O32:
 736                 if (bits == 16)
 737                     isize++;
 738                 break;
 739             case P_A64:
 740             case P_O64:
 741             case P_none:
 742                 break;
 743             default:
 744                 isize++;
 745                 break;
 746             }
 747         }
 748         return isize * instruction->times;
 749     } else {
 750         return -1;                  /* didn't match any instruction */
 751     }
 752 }
 753
 754 static bool possible_sbyte(operand *o)
 755 {
 756     return o->wrt == NO_SEG && o->segment == NO_SEG &&
 757         !(o->opflags & OPFLAG_UNKNOWN) &&
 758         optimizing >= 0 && !(o->type & STRICT);
 759 }
 760
 761 /* check that opn[op]  is a signed byte of size 16 or 32 */
 762 static bool is_sbyte16(operand *o)
 763 {
 764     int16_t v;
 765
 766     if (!possible_sbyte(o))
 767         return false;
 768
 769     v = o->offset;
 770     return v >= -128 && v <= 127;
 771 }
 772
 773 static bool is_sbyte32(operand *o)
 774 {
 775     int32_t v;
 776
 777     if (!possible_sbyte(o))
 778         return false;
 779
 780     v = o->offset;
 781     return v >= -128 && v <= 127;
 782 }
 783
 784 /* Common construct */
 785 #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
 786
 787 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 788                         insn * ins, const uint8_t *codes)
 789 {
 790     int64_t length = 0;
 791     uint8_t c;
 792     int rex_mask = ~0;
 793     int op1, op2;
 794     struct operand *opx;
 795     uint8_t opex = 0;
 796
 797     ins->rex = 0;               /* Ensure REX is reset */
 798
 799     if (ins->prefixes[PPS_OSIZE] == P_O64)
 800         ins->rex |= REX_W;
 801
 802     (void)segment;              /* Don't warn that this parameter is unused */
 803     (void)offset;               /* Don't warn that this parameter is unused */
 804
 805     while (*codes) {
 806         c = *codes++;
 807         op1 = (c & 3) + ((opex & 1) << 2);
 808         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
 809         opx = &ins->oprs[op1];
 810         opex = 0;               /* For the next iteration */
 811
 812         switch (c) {
 813         case 01:
 814         case 02:
 815         case 03:
 816         case 04:
 817             codes += c, length += c;
 818             break;
 819
 820         case 05:
 821         case 06:
 822         case 07:
 823             opex = c;
 824             break;
 825
 826         case4(010):
 827             ins->rex |=
 828                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 829             codes++, length++;
 830             break;
 831
 832         case4(014):
 833         case4(020):
 834         case4(024):
 835             length++;
 836             break;
 837
 838         case4(030):
 839             length += 2;
 840             break;
 841
 842         case4(034):
 843             if (opx->type & (BITS16 | BITS32 | BITS64))
 844                 length += (opx->type & BITS16) ? 2 : 4;
 845             else
 846                 length += (bits == 16) ? 2 : 4;
 847             break;
 848
 849         case4(040):
 850             length += 4;
 851             break;
 852
 853         case4(044):
 854             length += ins->addr_size >> 3;
 855             break;
 856
 857         case4(050):
 858             length++;
 859             break;
 860
 861         case4(054):
 862             length += 8; /* MOV reg64/imm */
 863             break;
 864
 865         case4(060):
 866             length += 2;
 867             break;
 868
 869         case4(064):
 870             if (opx->type & (BITS16 | BITS32 | BITS64))
 871                 length += (opx->type & BITS16) ? 2 : 4;
 872             else
 873                 length += (bits == 16) ? 2 : 4;
 874             break;
 875
 876         case4(070):
 877             length += 4;
 878             break;
 879
 880         case4(074):
 881             length += 2;
 882             break;
 883
 884         case4(0140):
 885             length += is_sbyte16(opx) ? 1 : 2;
 886             break;
 887
 888         case4(0144):
 889             codes++;
 890             length++;
 891             break;
 892
 893         case4(0150):
 894             length += is_sbyte32(opx) ? 1 : 4;
 895             break;
 896
 897         case4(0154):
 898             codes++;
 899             length++;
 900             break;
 901
 902         case4(0160):
 903             length++;
 904             ins->rex |= REX_D;
 905             ins->drexdst = regval(opx);
 906             break;
 907
 908         case4(0164):
 909             length++;
 910             ins->rex |= REX_D|REX_OC;
 911             ins->drexdst = regval(opx);
 912             break;
 913
 914         case 0171:
 915             break;
 916
 917         case 0172:
 918         case 0173:
 919         case 0174:
 920             codes++;
 921             length++;
 922             break;
 923
 924         case4(0250):
 925             length += is_sbyte32(opx) ? 1 : 4;
 926             break;
 927
 928         case4(0254):
 929             length += 4;
 930             break;
 931
 932         case4(0260):
 933             ins->rex |= REX_V;
 934             ins->drexdst = regval(opx);
 935             ins->vex_cm = *codes++;
 936             ins->vex_wlp = *codes++;
 937             break;
 938
 939         case 0270:
 940             ins->rex |= REX_V;
 941             ins->drexdst = 0;
 942             ins->vex_cm = *codes++;
 943             ins->vex_wlp = *codes++;
 944             break;
 945
 946         case4(0274):
 947             length++;
 948             break;
 949
 950         case4(0300):
 951             break;
 952
 953         case 0310:
 954             if (bits == 64)
 955                 return -1;
 956             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
 957             break;
 958
 959         case 0311:
 960             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
 961             break;
 962
 963         case 0312:
 964             break;
 965
 966         case 0313:
 967             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
 968                 has_prefix(ins, PPS_ASIZE, P_A32))
 969                 return -1;
 970             break;
 971
 972         case4(0314):
 973             break;
 974
 975         case 0320:
 976             length += (bits != 16);
 977             break;
 978
 979         case 0321:
 980             length += (bits == 16);
 981             break;
 982
 983         case 0322:
 984             break;
 985
 986         case 0323:
 987             rex_mask &= ~REX_W;
 988             break;
 989
 990         case 0324:
 991             ins->rex |= REX_W;
 992             break;
 993
 994         case 0325:
 995             ins->rex |= REX_NH;
 996             break;
 997
 998         case 0330:
 999             codes++, length++;
1000             break;
1001
1002         case 0331:
1003             break;
1004
1005         case 0332:
1006         case 0333:
1007             length++;
1008             break;
1009
1010         case 0334:
1011             ins->rex |= REX_L;
1012             break;
1013
1014         case 0335:
1015             break;
1016
1017         case 0336:
1018             if (!ins->prefixes[PPS_LREP])
1019                 ins->prefixes[PPS_LREP] = P_REP;
1020             break;
1021
1022         case 0337:
1023             if (!ins->prefixes[PPS_LREP])
1024                 ins->prefixes[PPS_LREP] = P_REPNE;
1025             break;
1026
1027         case 0340:
1028             if (ins->oprs[0].segment != NO_SEG)
1029                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1030                         " quantity of BSS space");
1031             else
1032                 length += ins->oprs[0].offset;
1033             break;
1034
1035         case 0341:
1036             if (!ins->prefixes[PPS_WAIT])
1037                 ins->prefixes[PPS_WAIT] = P_WAIT;
1038             break;
1039
1040         case4(0344):
1041             length++;
1042             break;
1043
1044         case 0360:
1045             break;
1046
1047         case 0361:
1048         case 0362:
1049         case 0363:
1050             length++;
1051             break;
1052
1053         case 0364:
1054         case 0365:
1055             break;
1056
1057         case 0366:
1058         case 0367:
1059             length++;
1060             break;
1061
1062         case 0370:
1063         case 0371:
1064         case 0372:
1065             break;
1066
1067         case 0373:
1068             length++;
1069             break;
1070
1071         case4(0100):
1072         case4(0110):
1073         case4(0120):
1074         case4(0130):
1075         case4(0200):
1076         case4(0204):
1077         case4(0210):
1078         case4(0214):
1079         case4(0220):
1080         case4(0224):
1081         case4(0230):
1082         case4(0234):
1083             {
1084                 ea ea_data;
1085                 int rfield;
1086                 opflags_t rflags;
1087                 struct operand *opy = &ins->oprs[op2];
1088
1089                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1090
1091                 if (c <= 0177) {
1092                     /* pick rfield from operand b (opx) */
1093                     rflags = regflag(opx);
1094                     rfield = nasm_regvals[opx->basereg];
1095                 } else {
1096                     rflags = 0;
1097                     rfield = c & 7;
1098                 }
1099                 if (!process_ea(opy, &ea_data, bits,
1100                                 ins->addr_size, rfield, rflags)) {
1101                     errfunc(ERR_NONFATAL, "invalid effective address");
1102                     return -1;
1103                 } else {
1104                     ins->rex |= ea_data.rex;
1105                     length += ea_data.size;
1106                 }
1107             }
1108             break;
1109
1110         default:
1111             errfunc(ERR_PANIC, "internal instruction table corrupt"
1112                     ": instruction code \\%o (0x%02X) given", c, c);
1113             break;
1114         }
1115     }
1116
1117     ins->rex &= rex_mask;
1118
1119     if (ins->rex & REX_NH) {
1120         if (ins->rex & REX_H) {
1121             errfunc(ERR_NONFATAL, "instruction cannot use high registers");
1122             return -1;
1123         }
1124         ins->rex &= ~REX_P;     /* Don't force REX prefix due to high reg */
1125     }
1126
1127     if (ins->rex & REX_V) {
1128         int bad32 = REX_R|REX_W|REX_X|REX_B;
1129
1130         if (ins->rex & REX_H) {
1131             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1132             return -1;
1133         }
1134         switch (ins->vex_wlp & 030) {
1135         case 000:
1136         case 020:
1137             ins->rex &= ~REX_W;
1138             break;
1139         case 010:
1140             ins->rex |= REX_W;
1141             bad32 &= ~REX_W;
1142             break;
1143         case 030:
1144             /* Follow REX_W */
1145             break;
1146         }
1147
1148         if (bits != 64 && ((ins->rex & bad32) || ins->drexdst > 7)) {
1149             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1150             return -1;
1151         }
1152         if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_R|REX_B)))
1153             length += 3;
1154         else
1155             length += 2;
1156     } else if (ins->rex & REX_D) {
1157         if (ins->rex & REX_H) {
1158             errfunc(ERR_NONFATAL, "cannot use high register in drex instruction");
1159             return -1;
1160         }
1161         if (bits != 64 && ((ins->rex & (REX_R|REX_W|REX_X|REX_B)) ||
1162                            ins->drexdst > 7)) {
1163             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1164             return -1;
1165         }
1166         length++;
1167     } else if (ins->rex & REX_REAL) {
1168         if (ins->rex & REX_H) {
1169             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1170             return -1;
1171         } else if (bits == 64) {
1172             length++;
1173         } else if ((ins->rex & REX_L) &&
1174                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1175                    cpu >= IF_X86_64) {
1176             /* LOCK-as-REX.R */
1177             assert_no_prefix(ins, PPS_LREP);
1178             length++;
1179         } else {
1180             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1181             return -1;
1182         }
1183     }
1184
1185     return length;
1186 }
1187
1188 #define EMIT_REX()                                                      \
1189     if (!(ins->rex & (REX_D|REX_V)) && (ins->rex & REX_REAL) && (bits == 64)) { \
1190         ins->rex = (ins->rex & REX_REAL)|REX_P;                         \
1191         out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG); \
1192         ins->rex = 0;                                                   \
1193         offset += 1; \
1194     }
1195
1196 static void gencode(int32_t segment, int64_t offset, int bits,
1197                     insn * ins, const struct itemplate *temp,
1198                     int64_t insn_end)
1199 {
1200     static char condval[] = {   /* conditional opcodes */
1201         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1202         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1203         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1204     };
1205     uint8_t c;
1206     uint8_t bytes[4];
1207     int64_t size;
1208     int64_t data;
1209     int op1, op2;
1210     struct operand *opx;
1211     const uint8_t *codes = temp->code;
1212     uint8_t opex = 0;
1213
1214     while (*codes) {
1215         c = *codes++;
1216         op1 = (c & 3) + ((opex & 1) << 2);
1217         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
1218         opx = &ins->oprs[op1];
1219         opex = 0;               /* For the next iteration */
1220
1221         switch (c) {
1222         case 01:
1223         case 02:
1224         case 03:
1225         case 04:
1226             EMIT_REX();
1227             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1228             codes += c;
1229             offset += c;
1230             break;
1231
1232         case 05:
1233         case 06:
1234         case 07:
1235             opex = c;
1236             break;
1237
1238         case4(010):
1239             EMIT_REX();
1240             bytes[0] = *codes++ + (regval(opx) & 7);
1241             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1242             offset += 1;
1243             break;
1244
1245         case4(014):
1246             /* The test for BITS8 and SBYTE here is intended to avoid
1247                warning on optimizer actions due to SBYTE, while still
1248                warn on explicit BYTE directives.  Also warn, obviously,
1249                if the optimizer isn't enabled. */
1250             if (((opx->type & BITS8) ||
1251                  !(opx->type & temp->opd[op1] & BYTENESS)) &&
1252                 (opx->offset < -128 || opx->offset > 127)) {
1253                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1254                         "signed byte value exceeds bounds");
1255             }
1256             if (opx->segment != NO_SEG) {
1257                 data = opx->offset;
1258                 out(offset, segment, &data, OUT_ADDRESS, 1,
1259                     opx->segment, opx->wrt);
1260             } else {
1261                 bytes[0] = opx->offset;
1262                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1263                     NO_SEG);
1264             }
1265             offset += 1;
1266             break;
1267
1268         case4(020):
1269             if (opx->offset < -256 || opx->offset > 255) {
1270                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1271                         "byte value exceeds bounds");
1272             }
1273             if (opx->segment != NO_SEG) {
1274                 data = opx->offset;
1275                 out(offset, segment, &data, OUT_ADDRESS, 1,
1276                     opx->segment, opx->wrt);
1277             } else {
1278                 bytes[0] = opx->offset;
1279                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1280                     NO_SEG);
1281             }
1282             offset += 1;
1283             break;
1284
1285         case4(024):
1286             if (opx->offset < 0 || opx->offset > 255)
1287                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1288                         "unsigned byte value exceeds bounds");
1289             if (opx->segment != NO_SEG) {
1290                 data = opx->offset;
1291                 out(offset, segment, &data, OUT_ADDRESS, 1,
1292                     opx->segment, opx->wrt);
1293             } else {
1294                 bytes[0] = opx->offset;
1295                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1296                     NO_SEG);
1297             }
1298             offset += 1;
1299             break;
1300
1301         case4(030):
1302             warn_overflow_opd(opx, 2);
1303             data = opx->offset;
1304             out(offset, segment, &data, OUT_ADDRESS, 2,
1305                 opx->segment, opx->wrt);
1306             offset += 2;
1307             break;
1308
1309         case4(034):
1310             if (opx->type & (BITS16 | BITS32))
1311                 size = (opx->type & BITS16) ? 2 : 4;
1312             else
1313                 size = (bits == 16) ? 2 : 4;
1314             warn_overflow_opd(opx, size);
1315             data = opx->offset;
1316             out(offset, segment, &data, OUT_ADDRESS, size,
1317                 opx->segment, opx->wrt);
1318             offset += size;
1319             break;
1320
1321         case4(040):
1322             warn_overflow_opd(opx, 4);
1323             data = opx->offset;
1324             out(offset, segment, &data, OUT_ADDRESS, 4,
1325                 opx->segment, opx->wrt);
1326             offset += 4;
1327             break;
1328
1329         case4(044):
1330             data = opx->offset;
1331             size = ins->addr_size >> 3;
1332             warn_overflow_opd(opx, size);
1333             out(offset, segment, &data, OUT_ADDRESS, size,
1334                 opx->segment, opx->wrt);
1335             offset += size;
1336             break;
1337
1338         case4(050):
1339             if (opx->segment != segment) {
1340                 data = opx->offset;
1341                 out(offset, segment, &data,
1342                     OUT_REL1ADR, insn_end - offset,
1343                     opx->segment, opx->wrt);
1344             } else {
1345                 data = opx->offset - insn_end;
1346                 if (data > 127 || data < -128)
1347                     errfunc(ERR_NONFATAL, "short jump is out of range");
1348                 out(offset, segment, &data,
1349                     OUT_ADDRESS, 1, NO_SEG, NO_SEG);
1350             }
1351             offset += 1;
1352             break;
1353
1354         case4(054):
1355             data = (int64_t)opx->offset;
1356             out(offset, segment, &data, OUT_ADDRESS, 8,
1357                 opx->segment, opx->wrt);
1358             offset += 8;
1359             break;
1360
1361         case4(060):
1362             if (opx->segment != segment) {
1363                 data = opx->offset;
1364                 out(offset, segment, &data,
1365                     OUT_REL2ADR, insn_end - offset,
1366                     opx->segment, opx->wrt);
1367             } else {
1368                 data = opx->offset - insn_end;
1369                 out(offset, segment, &data,
1370                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1371             }
1372             offset += 2;
1373             break;
1374
1375         case4(064):
1376             if (opx->type & (BITS16 | BITS32 | BITS64))
1377                 size = (opx->type & BITS16) ? 2 : 4;
1378             else
1379                 size = (bits == 16) ? 2 : 4;
1380             if (opx->segment != segment) {
1381                 data = opx->offset;
1382                 out(offset, segment, &data,
1383                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1384                     insn_end - offset, opx->segment, opx->wrt);
1385             } else {
1386                 data = opx->offset - insn_end;
1387                 out(offset, segment, &data,
1388                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1389             }
1390             offset += size;
1391             break;
1392
1393         case4(070):
1394             if (opx->segment != segment) {
1395                 data = opx->offset;
1396                 out(offset, segment, &data,
1397                     OUT_REL4ADR, insn_end - offset,
1398                     opx->segment, opx->wrt);
1399             } else {
1400                 data = opx->offset - insn_end;
1401                 out(offset, segment, &data,
1402                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1403             }
1404             offset += 4;
1405             break;
1406
1407         case4(074):
1408             if (opx->segment == NO_SEG)
1409                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1410                         " relocatable");
1411             data = 0;
1412             out(offset, segment, &data, OUT_ADDRESS, 2,
1413                 outfmt->segbase(1 + opx->segment),
1414                 opx->wrt);
1415             offset += 2;
1416             break;
1417
1418         case4(0140):
1419             data = opx->offset;
1420             warn_overflow_opd(opx, 2);
1421             if (is_sbyte16(opx)) {
1422                 bytes[0] = data;
1423                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1424                     NO_SEG);
1425                 offset++;
1426             } else {
1427                 out(offset, segment, &data, OUT_ADDRESS, 2,
1428                     opx->segment, opx->wrt);
1429                 offset += 2;
1430             }
1431             break;
1432
1433         case4(0144):
1434             EMIT_REX();
1435             bytes[0] = *codes++;
1436             if (is_sbyte16(opx))
1437                 bytes[0] |= 2;  /* s-bit */
1438             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1439             offset++;
1440             break;
1441
1442         case4(0150):
1443             data = opx->offset;
1444             warn_overflow_opd(opx, 4);
1445             if (is_sbyte32(opx)) {
1446                 bytes[0] = data;
1447                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1448                     NO_SEG);
1449                 offset++;
1450             } else {
1451                 out(offset, segment, &data, OUT_ADDRESS, 4,
1452                     opx->segment, opx->wrt);
1453                 offset += 4;
1454             }
1455             break;
1456
1457         case4(0154):
1458             EMIT_REX();
1459             bytes[0] = *codes++;
1460             if (is_sbyte32(opx))
1461                 bytes[0] |= 2;  /* s-bit */
1462             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1463             offset++;
1464             break;
1465
1466         case4(0160):
1467         case4(0164):
1468             break;
1469
1470         case 0171:
1471             bytes[0] =
1472                 (ins->drexdst << 4) |
1473                 (ins->rex & REX_OC ? 0x08 : 0) |
1474                 (ins->rex & (REX_R|REX_X|REX_B));
1475             ins->rex = 0;
1476             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1477             offset++;
1478             break;
1479
1480         case 0172:
1481             c = *codes++;
1482             opx = &ins->oprs[c >> 3];
1483             bytes[0] = nasm_regvals[opx->basereg] << 4;
1484             opx = &ins->oprs[c & 7];
1485             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1486                 errfunc(ERR_NONFATAL,
1487                         "non-absolute expression not permitted as argument %d",
1488                         c & 7);
1489             } else {
1490                 if (opx->offset & ~15) {
1491                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1492                             "four-bit argument exceeds bounds");
1493                 }
1494                 bytes[0] |= opx->offset & 15;
1495             }
1496             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1497             offset++;
1498             break;
1499
1500         case 0173:
1501             c = *codes++;
1502             opx = &ins->oprs[c >> 4];
1503             bytes[0] = nasm_regvals[opx->basereg] << 4;
1504             bytes[0] |= c & 15;
1505             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1506             offset++;
1507             break;
1508
1509         case 0174:
1510             c = *codes++;
1511             opx = &ins->oprs[c];
1512             bytes[0] = nasm_regvals[opx->basereg] << 4;
1513             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1514             offset++;
1515             break;
1516
1517         case4(0250):
1518             data = opx->offset;
1519             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1520                 (int32_t)data != (int64_t)data) {
1521                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1522                         "signed dword immediate exceeds bounds");
1523             }
1524             if (is_sbyte32(opx)) {
1525                 bytes[0] = data;
1526                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1527                     NO_SEG);
1528                 offset++;
1529             } else {
1530                 out(offset, segment, &data, OUT_ADDRESS, 4,
1531                     opx->segment, opx->wrt);
1532                 offset += 4;
1533             }
1534             break;
1535
1536         case4(0254):
1537             data = opx->offset;
1538             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1539                 (int32_t)data != (int64_t)data) {
1540                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1541                         "signed dword immediate exceeds bounds");
1542             }
1543             out(offset, segment, &data, OUT_ADDRESS, 4,
1544                 opx->segment, opx->wrt);
1545             offset += 4;
1546             break;
1547
1548         case4(0260):
1549         case 0270:
1550             codes += 2;
1551             if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1552                 bytes[0] = (ins->vex_cm >> 6) ? 0x8f : 0xc4;
1553                 bytes[1] = (ins->vex_cm & 31) | ((~ins->rex & 7) << 5);
1554                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1555                     ((~ins->drexdst & 15)<< 3) | (ins->vex_wlp & 07);
1556                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1557                 offset += 3;
1558             } else {
1559                 bytes[0] = 0xc5;
1560                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1561                     ((~ins->drexdst & 15) << 3) | (ins->vex_wlp & 07);
1562                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1563                 offset += 2;
1564             }
1565             break;
1566
1567         case4(0274):
1568         {
1569             uint64_t uv, um;
1570             int s;
1571
1572             if (ins->rex & REX_W)
1573                 s = 64;
1574             else if (ins->prefixes[PPS_OSIZE] == P_O16)
1575                 s = 16;
1576             else if (ins->prefixes[PPS_OSIZE] == P_O32)
1577                 s = 32;
1578             else
1579                 s = bits;
1580
1581             um = (uint64_t)2 << (s-1);
1582             uv = opx->offset;
1583
1584             if (uv > 127 && uv < (uint64_t)-128 &&
1585                 (uv < um-128 || uv > um-1)) {
1586                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1587                         "signed byte value exceeds bounds");
1588             }
1589             if (opx->segment != NO_SEG) {
1590                 data = uv;
1591                 out(offset, segment, &data, OUT_ADDRESS, 1,
1592                     opx->segment, opx->wrt);
1593             } else {
1594                 bytes[0] = uv;
1595                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1596                     NO_SEG);
1597             }
1598             offset += 1;
1599             break;
1600         }
1601
1602         case4(0300):
1603             break;
1604
1605         case 0310:
1606             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1607                 *bytes = 0x67;
1608                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1609                 offset += 1;
1610             } else
1611                 offset += 0;
1612             break;
1613
1614         case 0311:
1615             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1616                 *bytes = 0x67;
1617                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1618                 offset += 1;
1619             } else
1620                 offset += 0;
1621             break;
1622
1623         case 0312:
1624             break;
1625
1626         case 0313:
1627             ins->rex = 0;
1628             break;
1629
1630         case4(0314):
1631             break;
1632
1633         case 0320:
1634             if (bits != 16) {
1635                 *bytes = 0x66;
1636                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1637                 offset += 1;
1638             } else
1639                 offset += 0;
1640             break;
1641
1642         case 0321:
1643             if (bits == 16) {
1644                 *bytes = 0x66;
1645                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1646                 offset += 1;
1647             } else
1648                 offset += 0;
1649             break;
1650
1651         case 0322:
1652         case 0323:
1653             break;
1654
1655         case 0324:
1656             ins->rex |= REX_W;
1657             break;
1658
1659         case 0325:
1660             break;
1661
1662         case 0330:
1663             *bytes = *codes++ ^ condval[ins->condition];
1664             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1665             offset += 1;
1666             break;
1667
1668         case 0331:
1669             break;
1670
1671         case 0332:
1672         case 0333:
1673             *bytes = c - 0332 + 0xF2;
1674             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1675             offset += 1;
1676             break;
1677
1678         case 0334:
1679             if (ins->rex & REX_R) {
1680                 *bytes = 0xF0;
1681                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1682                 offset += 1;
1683             }
1684             ins->rex &= ~(REX_L|REX_R);
1685             break;
1686
1687         case 0335:
1688             break;
1689
1690         case 0336:
1691         case 0337:
1692             break;
1693
1694         case 0340:
1695             if (ins->oprs[0].segment != NO_SEG)
1696                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1697             else {
1698                 int64_t size = ins->oprs[0].offset;
1699                 if (size > 0)
1700                     out(offset, segment, NULL,
1701                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1702                 offset += size;
1703             }
1704             break;
1705
1706         case 0341:
1707             break;
1708
1709         case 0344:
1710         case 0345:
1711             bytes[0] = c & 1;
1712             switch (ins->oprs[0].basereg) {
1713             case R_CS:
1714                 bytes[0] += 0x0E;
1715                 break;
1716             case R_DS:
1717                 bytes[0] += 0x1E;
1718                 break;
1719             case R_ES:
1720                 bytes[0] += 0x06;
1721                 break;
1722             case R_SS:
1723                 bytes[0] += 0x16;
1724                 break;
1725             default:
1726                 errfunc(ERR_PANIC,
1727                         "bizarre 8086 segment register received");
1728             }
1729             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1730             offset++;
1731             break;
1732
1733         case 0346:
1734         case 0347:
1735             bytes[0] = c & 1;
1736             switch (ins->oprs[0].basereg) {
1737             case R_FS:
1738                 bytes[0] += 0xA0;
1739                 break;
1740             case R_GS:
1741                 bytes[0] += 0xA8;
1742                 break;
1743             default:
1744                 errfunc(ERR_PANIC,
1745                         "bizarre 386 segment register received");
1746             }
1747             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1748             offset++;
1749             break;
1750
1751         case 0360:
1752             break;
1753
1754         case 0361:
1755             bytes[0] = 0x66;
1756             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1757             offset += 1;
1758             break;
1759
1760         case 0362:
1761         case 0363:
1762             bytes[0] = c - 0362 + 0xf2;
1763             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1764             offset += 1;
1765             break;
1766
1767         case 0364:
1768         case 0365:
1769             break;
1770
1771         case 0366:
1772         case 0367:
1773             *bytes = c - 0366 + 0x66;
1774             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1775             offset += 1;
1776             break;
1777
1778         case 0370:
1779         case 0371:
1780         case 0372:
1781             break;
1782
1783         case 0373:
1784             *bytes = bits == 16 ? 3 : 5;
1785             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1786             offset += 1;
1787             break;
1788
1789         case4(0100):
1790         case4(0110):
1791         case4(0120):
1792         case4(0130):
1793         case4(0200):
1794         case4(0204):
1795         case4(0210):
1796         case4(0214):
1797         case4(0220):
1798         case4(0224):
1799         case4(0230):
1800         case4(0234):
1801             {
1802                 ea ea_data;
1803                 int rfield;
1804                 opflags_t rflags;
1805                 uint8_t *p;
1806                 int32_t s;
1807                 enum out_type type;
1808                 struct operand *opy = &ins->oprs[op2];
1809
1810                 if (c <= 0177) {
1811                     /* pick rfield from operand b (opx) */
1812                     rflags = regflag(opx);
1813                     rfield = nasm_regvals[opx->basereg];
1814                 } else {
1815                     /* rfield is constant */
1816                     rflags = 0;
1817                     rfield = c & 7;
1818                 }
1819
1820                 if (!process_ea(opy, &ea_data, bits, ins->addr_size,
1821                                 rfield, rflags)) {
1822                     errfunc(ERR_NONFATAL, "invalid effective address");
1823                 }
1824
1825
1826                 p = bytes;
1827                 *p++ = ea_data.modrm;
1828                 if (ea_data.sib_present)
1829                     *p++ = ea_data.sib;
1830
1831                 /* DREX suffixes come between the SIB and the displacement */
1832                 if (ins->rex & REX_D) {
1833                     *p++ = (ins->drexdst << 4) |
1834                            (ins->rex & REX_OC ? 0x08 : 0) |
1835                            (ins->rex & (REX_R|REX_X|REX_B));
1836                     ins->rex = 0;
1837                 }
1838
1839                 s = p - bytes;
1840                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1841
1842                 /*
1843                  * Make sure the address gets the right offset in case
1844                  * the line breaks in the .lst file (BR 1197827)
1845                  */
1846                 offset += s;
1847                 s = 0;
1848
1849                 switch (ea_data.bytes) {
1850                 case 0:
1851                     break;
1852                 case 1:
1853                 case 2:
1854                 case 4:
1855                 case 8:
1856                     data = opy->offset;
1857                     s += ea_data.bytes;
1858                     if (ea_data.rip) {
1859                         if (opy->segment == segment) {
1860                             data -= insn_end;
1861                             if (overflow_signed(data, ea_data.bytes))
1862                                 warn_overflow(ERR_PASS2, ea_data.bytes);
1863                             out(offset, segment, &data, OUT_ADDRESS,
1864                                 ea_data.bytes, NO_SEG, NO_SEG);
1865                         } else {
1866                             /* overflow check in output/linker? */
1867                             out(offset, segment, &data, OUT_REL4ADR,
1868                                 insn_end - offset, opy->segment, opy->wrt);
1869                         }
1870                     } else {
1871                         if (overflow_general(opy->offset, ins->addr_size >> 3) ||
1872                             signed_bits(opy->offset, ins->addr_size) !=
1873                             signed_bits(opy->offset, ea_data.bytes * 8))
1874                             warn_overflow(ERR_PASS2, ea_data.bytes);
1875
1876                         type = OUT_ADDRESS;
1877                         out(offset, segment, &data, OUT_ADDRESS,
1878                             ea_data.bytes, opy->segment, opy->wrt);
1879                     }
1880                     break;
1881                 default:
1882                     /* Impossible! */
1883                     errfunc(ERR_PANIC,
1884                             "Invalid amount of bytes (%d) for offset?!",
1885                             ea_data.bytes);
1886                     break;
1887                 }
1888                 offset += s;
1889             }
1890             break;
1891
1892         default:
1893             errfunc(ERR_PANIC, "internal instruction table corrupt"
1894                     ": instruction code \\%o (0x%02X) given", c, c);
1895             break;
1896         }
1897     }
1898 }
1899
1900 static opflags_t regflag(const operand * o)
1901 {
1902     if (!is_register(o->basereg))
1903         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1904     return nasm_reg_flags[o->basereg];
1905 }
1906
1907 static int32_t regval(const operand * o)
1908 {
1909     if (!is_register(o->basereg))
1910         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1911     return nasm_regvals[o->basereg];
1912 }
1913
1914 static int op_rexflags(const operand * o, int mask)
1915 {
1916     opflags_t flags;
1917     int val;
1918
1919     if (!is_register(o->basereg))
1920         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1921
1922     flags = nasm_reg_flags[o->basereg];
1923     val = nasm_regvals[o->basereg];
1924
1925     return rexflags(val, flags, mask);
1926 }
1927
1928 static int rexflags(int val, opflags_t flags, int mask)
1929 {
1930     int rex = 0;
1931
1932     if (val >= 8)
1933         rex |= REX_B|REX_X|REX_R;
1934     if (flags & BITS64)
1935         rex |= REX_W;
1936     if (!(REG_HIGH & ~flags))   /* AH, CH, DH, BH */
1937         rex |= REX_H;
1938     else if (!(REG8 & ~flags) && val >= 4) /* SPL, BPL, SIL, DIL */
1939         rex |= REX_P;
1940
1941     return rex & mask;
1942 }
1943
1944 static enum match_result find_match(const struct itemplate **tempp,
1945                                     insn *instruction,
1946                                     int32_t segment, int64_t offset, int bits)
1947 {
1948     const struct itemplate *temp;
1949     enum match_result m, merr;
1950     opflags_t xsizeflags[MAX_OPERANDS];
1951     bool opsizemissing = false;
1952     int i;
1953
1954     for (i = 0; i < instruction->operands; i++)
1955         xsizeflags[i] = instruction->oprs[i].type & SIZE_MASK;
1956
1957     merr = MERR_INVALOP;
1958
1959     for (temp = nasm_instructions[instruction->opcode];
1960          temp->opcode != I_none; temp++) {
1961         m = matches(temp, instruction, bits);
1962         if (m == MOK_JUMP) {
1963             if (jmp_match(segment, offset, bits, instruction, temp->code))
1964                 m = MOK_GOOD;
1965             else
1966                 m = MERR_INVALOP;
1967         } else if (m == MERR_OPSIZEMISSING &&
1968                    (temp->flags & IF_SMASK) != IF_SX) {
1969             /*
1970              * Missing operand size and a candidate for fuzzy matching...
1971              */
1972             for (i = 0; i < temp->operands; i++) {
1973                 if ((temp->opd[i] & SAME_AS) == 0)
1974                     xsizeflags[i] |= temp->opd[i] & SIZE_MASK;
1975             }
1976             opsizemissing = true;
1977         }
1978         if (m > merr)
1979             merr = m;
1980         if (merr == MOK_GOOD)
1981             goto done;
1982     }
1983
1984     /* No match, but see if we can get a fuzzy operand size match... */
1985     if (!opsizemissing)
1986         goto done;
1987
1988     for (i = 0; i < instruction->operands; i++) {
1989         /*
1990          * We ignore extrinsic operand sizes on registers, so we should
1991          * never try to fuzzy-match on them.  This also resolves the case
1992          * when we have e.g. "xmmrm128" in two different positions.
1993          */
1994         if (is_class(REGISTER, instruction->oprs[i].type))
1995             continue;
1996
1997         /* This tests if xsizeflags[i] has more than one bit set */
1998         if ((xsizeflags[i] & (xsizeflags[i]-1)))
1999             goto done;          /* No luck */
2000
2001         instruction->oprs[i].type |= xsizeflags[i]; /* Set the size */
2002     }
2003
2004     /* Try matching again... */
2005     for (temp = nasm_instructions[instruction->opcode];
2006          temp->opcode != I_none; temp++) {
2007         m = matches(temp, instruction, bits);
2008         if (m == MOK_JUMP) {
2009             if (jmp_match(segment, offset, bits, instruction, temp->code))
2010                 m = MOK_GOOD;
2011             else
2012                 m = MERR_INVALOP;
2013         }
2014         if (m > merr)
2015             merr = m;
2016         if (merr == MOK_GOOD)
2017             goto done;
2018     }
2019
2020 done:
2021     *tempp = temp;
2022     return merr;
2023 }
2024
2025 static enum match_result matches(const struct itemplate *itemp,
2026                                  insn *instruction, int bits)
2027 {
2028     int i, size[MAX_OPERANDS], asize, oprs;
2029     bool opsizemissing = false;
2030
2031     /*
2032      * Check the opcode
2033      */
2034     if (itemp->opcode != instruction->opcode)
2035         return MERR_INVALOP;
2036
2037     /*
2038      * Count the operands
2039      */
2040     if (itemp->operands != instruction->operands)
2041         return MERR_INVALOP;
2042
2043     /*
2044      * Check that no spurious colons or TOs are present
2045      */
2046     for (i = 0; i < itemp->operands; i++)
2047         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
2048             return MERR_INVALOP;
2049
2050     /*
2051      * Process size flags
2052      */
2053     switch (itemp->flags & IF_SMASK) {
2054     case IF_SB:
2055         asize = BITS8;
2056         break;
2057     case IF_SW:
2058         asize = BITS16;
2059         break;
2060     case IF_SD:
2061         asize = BITS32;
2062         break;
2063     case IF_SQ:
2064         asize = BITS64;
2065         break;
2066     case IF_SO:
2067         asize = BITS128;
2068         break;
2069     case IF_SY:
2070         asize = BITS256;
2071         break;
2072     case IF_SZ:
2073         switch (bits) {
2074         case 16:
2075             asize = BITS16;
2076             break;
2077         case 32:
2078             asize = BITS32;
2079             break;
2080         case 64:
2081             asize = BITS64;
2082             break;
2083         default:
2084             asize = 0;
2085             break;
2086         }
2087         break;
2088     default:
2089         asize = 0;
2090         break;
2091     }
2092
2093     if (itemp->flags & IF_ARMASK) {
2094         /* S- flags only apply to a specific operand */
2095         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
2096         memset(size, 0, sizeof size);
2097         size[i] = asize;
2098     } else {
2099         /* S- flags apply to all operands */
2100         for (i = 0; i < MAX_OPERANDS; i++)
2101             size[i] = asize;
2102     }
2103
2104     /*
2105      * Check that the operand flags all match up,
2106      * it's a bit tricky so lets be verbose:
2107      *
2108      * 1) Find out the size of operand. If instruction
2109      *    doesn't have one specified -- we're trying to
2110      *    guess it either from template (IF_S* flag) or
2111      *    from code bits.
2112      *
2113      * 2) If template operand (i) has SAME_AS flag [used for registers only]
2114      *    (ie the same operand as was specified somewhere in template, and
2115      *    this referred operand index is being achieved via ~SAME_AS)
2116      *    we are to be sure that both registers (in template and instruction)
2117      *    do exactly match.
2118      *
2119      * 3) If template operand do not match the instruction OR
2120      *    template has an operand size specified AND this size differ
2121      *    from which instruction has (perhaps we got it from code bits)
2122      *    we are:
2123      *      a)  Check that only size of instruction and operand is differ
2124      *          other characteristics do match
2125      *      b)  Perhaps it's a register specified in instruction so
2126      *          for such a case we just mark that operand as "size
2127      *          missing" and this will turn on fuzzy operand size
2128      *          logic facility (handled by a caller)
2129      */
2130     for (i = 0; i < itemp->operands; i++) {
2131         opflags_t type = instruction->oprs[i].type;
2132         if (!(type & SIZE_MASK))
2133             type |= size[i];
2134
2135         if (itemp->opd[i] & SAME_AS) {
2136             int j = itemp->opd[i] & ~SAME_AS;
2137             if (type != instruction->oprs[j].type ||
2138                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2139                 return MERR_INVALOP;
2140         } else if (itemp->opd[i] & ~type ||
2141             ((itemp->opd[i] & SIZE_MASK) &&
2142              ((itemp->opd[i] ^ type) & SIZE_MASK))) {
2143             if ((itemp->opd[i] & ~type & ~SIZE_MASK) || (type & SIZE_MASK)) {
2144                 return MERR_INVALOP;
2145             } else if (!is_class(REGISTER, type)) {
2146                 /*
2147                  * Note: we don't honor extrinsic operand sizes for registers,
2148                  * so "missing operand size" for a register should be
2149                  * considered a wildcard match rather than an error.
2150                  */
2151                 opsizemissing = true;
2152             }
2153         }
2154     }
2155
2156     if (opsizemissing)
2157         return MERR_OPSIZEMISSING;
2158
2159     /*
2160      * Check operand sizes
2161      */
2162     if (itemp->flags & (IF_SM | IF_SM2)) {
2163         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2164         for (i = 0; i < oprs; i++) {
2165             asize = itemp->opd[i] & SIZE_MASK;
2166             if (asize) {
2167                 for (i = 0; i < oprs; i++)
2168                     size[i] = asize;
2169                 break;
2170             }
2171         }
2172     } else {
2173         oprs = itemp->operands;
2174     }
2175
2176     for (i = 0; i < itemp->operands; i++) {
2177         if (!(itemp->opd[i] & SIZE_MASK) &&
2178             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2179             return MERR_OPSIZEMISMATCH;
2180     }
2181
2182     /*
2183      * Check template is okay at the set cpu level
2184      */
2185     if (((itemp->flags & IF_PLEVEL) > cpu))
2186         return MERR_BADCPU;
2187
2188     /*
2189      * Verify the appropriate long mode flag.
2190      */
2191     if ((itemp->flags & (bits == 64 ? IF_NOLONG : IF_LONG)))
2192         return MERR_BADMODE;
2193
2194     /*
2195      * Check if special handling needed for Jumps
2196      */
2197     if ((itemp->code[0] & 0374) == 0370)
2198         return MOK_JUMP;
2199
2200     return MOK_GOOD;
2201 }
2202
2203 static ea *process_ea(operand * input, ea * output, int bits,
2204                       int addrbits, int rfield, opflags_t rflags)
2205 {
2206     bool forw_ref = !!(input->opflags & OPFLAG_UNKNOWN);
2207
2208     output->rip = false;
2209
2210     /* REX flags for the rfield operand */
2211     output->rex |= rexflags(rfield, rflags, REX_R|REX_P|REX_W|REX_H);
2212
2213     if (is_class(REGISTER, input->type)) {  /* register direct */
2214         int i;
2215         opflags_t f;
2216
2217         if (!is_register(input->basereg))
2218             return NULL;
2219         f = regflag(input);
2220         i = nasm_regvals[input->basereg];
2221
2222         if (REG_EA & ~f)
2223             return NULL;        /* Invalid EA register */
2224
2225         output->rex |= op_rexflags(input, REX_B|REX_P|REX_W|REX_H);
2226
2227         output->sib_present = false;             /* no SIB necessary */
2228         output->bytes = 0;  /* no offset necessary either */
2229         output->modrm = 0xC0 | ((rfield & 7) << 3) | (i & 7);
2230     } else {                    /* it's a memory reference */
2231         if (input->basereg == -1
2232             && (input->indexreg == -1 || input->scale == 0)) {
2233             /* it's a pure offset */
2234
2235             if (bits == 64 && ((input->type & IP_REL) == IP_REL) &&
2236                 input->segment == NO_SEG) {
2237                 nasm_error(ERR_WARNING | ERR_PASS1, "absolute address can not be RIP-relative");
2238                 input->type &= ~IP_REL;
2239                 input->type |= MEMORY;
2240             }
2241
2242             if (input->eaflags & EAF_BYTEOFFS ||
2243                 (input->eaflags & EAF_WORDOFFS &&
2244                  input->disp_size != (addrbits != 16 ? 32 : 16))) {
2245                 nasm_error(ERR_WARNING | ERR_PASS1, "displacement size ignored on absolute address");
2246             }
2247
2248             if (bits == 64 && (~input->type & IP_REL)) {
2249               int scale, index, base;
2250               output->sib_present = true;
2251               scale = 0;
2252               index = 4;
2253               base = 5;
2254               output->sib = (scale << 6) | (index << 3) | base;
2255               output->bytes = 4;
2256               output->modrm = 4 | ((rfield & 7) << 3);
2257               output->rip = false;
2258             } else {
2259               output->sib_present = false;
2260               output->bytes = (addrbits != 16 ? 4 : 2);
2261               output->modrm = (addrbits != 16 ? 5 : 6) | ((rfield & 7) << 3);
2262               output->rip = bits == 64;
2263             }
2264         } else {                /* it's an indirection */
2265             int i = input->indexreg, b = input->basereg, s = input->scale;
2266             int32_t seg = input->segment;
2267             int hb = input->hintbase, ht = input->hinttype;
2268             int t, it, bt;              /* register numbers */
2269             opflags_t x, ix, bx;        /* register flags */
2270
2271             if (s == 0)
2272                 i = -1;         /* make this easy, at least */
2273
2274             if (is_register(i)) {
2275                 it = nasm_regvals[i];
2276                 ix = nasm_reg_flags[i];
2277             } else {
2278                 it = -1;
2279                 ix = 0;
2280             }
2281
2282             if (is_register(b)) {
2283                 bt = nasm_regvals[b];
2284                 bx = nasm_reg_flags[b];
2285             } else {
2286                 bt = -1;
2287                 bx = 0;
2288             }
2289
2290             /* check for a 32/64-bit memory reference... */
2291             if ((ix|bx) & (BITS32|BITS64)) {
2292                 /* it must be a 32/64-bit memory reference. Firstly we have
2293                  * to check that all registers involved are type E/Rxx. */
2294                 int32_t sok = BITS32|BITS64, o = input->offset;
2295
2296                 if (it != -1) {
2297                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2298                         sok &= ix;
2299                     else
2300                         return NULL;
2301                 }
2302
2303                 if (bt != -1) {
2304                     if (REG_GPR & ~bx)
2305                         return NULL; /* Invalid register */
2306                     if (~sok & bx & SIZE_MASK)
2307                         return NULL; /* Invalid size */
2308                     sok &= bx;
2309                 }
2310
2311                 /* While we're here, ensure the user didn't specify
2312                    WORD or QWORD. */
2313                 if (input->disp_size == 16 || input->disp_size == 64)
2314                     return NULL;
2315
2316                 if (addrbits == 16 ||
2317                     (addrbits == 32 && !(sok & BITS32)) ||
2318                     (addrbits == 64 && !(sok & BITS64)))
2319                     return NULL;
2320
2321                 /* now reorganize base/index */
2322                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2323                     ((hb == b && ht == EAH_NOTBASE)
2324                      || (hb == i && ht == EAH_MAKEBASE))) {
2325                     /* swap if hints say so */
2326                     t = bt, bt = it, it = t;
2327                     x = bx, bx = ix, ix = x;
2328                 }
2329                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2330                     bt = -1, bx = 0, s++;
2331                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2332                     /* make single reg base, unless hint */
2333                     bt = it, bx = ix, it = -1, ix = 0;
2334                 }
2335                 if (((s == 2 && it != REG_NUM_ESP
2336                       && !(input->eaflags & EAF_TIMESTWO)) || s == 3
2337                      || s == 5 || s == 9) && bt == -1)
2338                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2339                 if (it == -1 && (bt & 7) != REG_NUM_ESP
2340                     && (input->eaflags & EAF_TIMESTWO))
2341                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2342                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2343                 if (s == 1 && it == REG_NUM_ESP) {
2344                     /* swap ESP into base if scale is 1 */
2345                     t = it, it = bt, bt = t;
2346                     x = ix, ix = bx, bx = x;
2347                 }
2348                 if (it == REG_NUM_ESP
2349                     || (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2350                     return NULL;        /* wrong, for various reasons */
2351
2352                 output->rex |= rexflags(it, ix, REX_X);
2353                 output->rex |= rexflags(bt, bx, REX_B);
2354
2355                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2356                     /* no SIB needed */
2357                     int mod, rm;
2358
2359                     if (bt == -1) {
2360                         rm = 5;
2361                         mod = 0;
2362                     } else {
2363                         rm = (bt & 7);
2364                         if (rm != REG_NUM_EBP && o == 0 &&
2365                                 seg == NO_SEG && !forw_ref &&
2366                                 !(input->eaflags &
2367                                   (EAF_BYTEOFFS | EAF_WORDOFFS)))
2368                             mod = 0;
2369                         else if (input->eaflags & EAF_BYTEOFFS ||
2370                                  (o >= -128 && o <= 127 && seg == NO_SEG
2371                                   && !forw_ref
2372                                   && !(input->eaflags & EAF_WORDOFFS)))
2373                             mod = 1;
2374                         else
2375                             mod = 2;
2376                     }
2377
2378                     output->sib_present = false;
2379                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2380                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2381                 } else {
2382                     /* we need a SIB */
2383                     int mod, scale, index, base;
2384
2385                     if (it == -1)
2386                         index = 4, s = 1;
2387                     else
2388                         index = (it & 7);
2389
2390                     switch (s) {
2391                     case 1:
2392                         scale = 0;
2393                         break;
2394                     case 2:
2395                         scale = 1;
2396                         break;
2397                     case 4:
2398                         scale = 2;
2399                         break;
2400                     case 8:
2401                         scale = 3;
2402                         break;
2403                     default:   /* then what the smeg is it? */
2404                         return NULL;    /* panic */
2405                     }
2406
2407                     if (bt == -1) {
2408                         base = 5;
2409                         mod = 0;
2410                     } else {
2411                         base = (bt & 7);
2412                         if (base != REG_NUM_EBP && o == 0 &&
2413                                     seg == NO_SEG && !forw_ref &&
2414                                     !(input->eaflags &
2415                                       (EAF_BYTEOFFS | EAF_WORDOFFS)))
2416                             mod = 0;
2417                         else if (input->eaflags & EAF_BYTEOFFS ||
2418                                  (o >= -128 && o <= 127 && seg == NO_SEG
2419                                   && !forw_ref
2420                                   && !(input->eaflags & EAF_WORDOFFS)))
2421                             mod = 1;
2422                         else
2423                             mod = 2;
2424                     }
2425
2426                     output->sib_present = true;
2427                     output->bytes =  (bt == -1 || mod == 2 ? 4 : mod);
2428                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | 4;
2429                     output->sib = (scale << 6) | (index << 3) | base;
2430                 }
2431             } else {            /* it's 16-bit */
2432                 int mod, rm;
2433                 int16_t o = input->offset;
2434
2435                 /* check for 64-bit long mode */
2436                 if (addrbits == 64)
2437                     return NULL;
2438
2439                 /* check all registers are BX, BP, SI or DI */
2440                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI
2441                      && b != R_DI) || (i != -1 && i != R_BP && i != R_BX
2442                                        && i != R_SI && i != R_DI))
2443                     return NULL;
2444
2445                 /* ensure the user didn't specify DWORD/QWORD */
2446                 if (input->disp_size == 32 || input->disp_size == 64)
2447                     return NULL;
2448
2449                 if (s != 1 && i != -1)
2450                     return NULL;        /* no can do, in 16-bit EA */
2451                 if (b == -1 && i != -1) {
2452                     int tmp = b;
2453                     b = i;
2454                     i = tmp;
2455                 }               /* swap */
2456                 if ((b == R_SI || b == R_DI) && i != -1) {
2457                     int tmp = b;
2458                     b = i;
2459                     i = tmp;
2460                 }
2461                 /* have BX/BP as base, SI/DI index */
2462                 if (b == i)
2463                     return NULL;        /* shouldn't ever happen, in theory */
2464                 if (i != -1 && b != -1 &&
2465                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2466                     return NULL;        /* invalid combinations */
2467                 if (b == -1)    /* pure offset: handled above */
2468                     return NULL;        /* so if it gets to here, panic! */
2469
2470                 rm = -1;
2471                 if (i != -1)
2472                     switch (i * 256 + b) {
2473                     case R_SI * 256 + R_BX:
2474                         rm = 0;
2475                         break;
2476                     case R_DI * 256 + R_BX:
2477                         rm = 1;
2478                         break;
2479                     case R_SI * 256 + R_BP:
2480                         rm = 2;
2481                         break;
2482                     case R_DI * 256 + R_BP:
2483                         rm = 3;
2484                         break;
2485                 } else
2486                     switch (b) {
2487                     case R_SI:
2488                         rm = 4;
2489                         break;
2490                     case R_DI:
2491                         rm = 5;
2492                         break;
2493                     case R_BP:
2494                         rm = 6;
2495                         break;
2496                     case R_BX:
2497                         rm = 7;
2498                         break;
2499                     }
2500                 if (rm == -1)   /* can't happen, in theory */
2501                     return NULL;        /* so panic if it does */
2502
2503                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2504                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2505                     mod = 0;
2506                 else if (input->eaflags & EAF_BYTEOFFS ||
2507                          (o >= -128 && o <= 127 && seg == NO_SEG
2508                           && !forw_ref
2509                           && !(input->eaflags & EAF_WORDOFFS)))
2510                     mod = 1;
2511                 else
2512                     mod = 2;
2513
2514                 output->sib_present = false;    /* no SIB - it's 16-bit */
2515                 output->bytes = mod;    /* bytes of offset needed */
2516                 output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2517             }
2518         }
2519     }
2520
2521     output->size = 1 + output->sib_present + output->bytes;
2522     return output;
2523 }
2524
2525 static void add_asp(insn *ins, int addrbits)
2526 {
2527     int j, valid;
2528     int defdisp;
2529
2530     valid = (addrbits == 64) ? 64|32 : 32|16;
2531
2532     switch (ins->prefixes[PPS_ASIZE]) {
2533     case P_A16:
2534         valid &= 16;
2535         break;
2536     case P_A32:
2537         valid &= 32;
2538         break;
2539     case P_A64:
2540         valid &= 64;
2541         break;
2542     case P_ASP:
2543         valid &= (addrbits == 32) ? 16 : 32;
2544         break;
2545     default:
2546         break;
2547     }
2548
2549     for (j = 0; j < ins->operands; j++) {
2550         if (is_class(MEMORY, ins->oprs[j].type)) {
2551             opflags_t i, b;
2552
2553             /* Verify as Register */
2554             if (!is_register(ins->oprs[j].indexreg))
2555                 i = 0;
2556             else
2557                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2558
2559             /* Verify as Register */
2560             if (!is_register(ins->oprs[j].basereg))
2561                 b = 0;
2562             else
2563                 b = nasm_reg_flags[ins->oprs[j].basereg];
2564
2565             if (ins->oprs[j].scale == 0)
2566                 i = 0;
2567
2568             if (!i && !b) {
2569                 int ds = ins->oprs[j].disp_size;
2570                 if ((addrbits != 64 && ds > 8) ||
2571                     (addrbits == 64 && ds == 16))
2572                     valid &= ds;
2573             } else {
2574                 if (!(REG16 & ~b))
2575                     valid &= 16;
2576                 if (!(REG32 & ~b))
2577                     valid &= 32;
2578                 if (!(REG64 & ~b))
2579                     valid &= 64;
2580
2581                 if (!(REG16 & ~i))
2582                     valid &= 16;
2583                 if (!(REG32 & ~i))
2584                     valid &= 32;
2585                 if (!(REG64 & ~i))
2586                     valid &= 64;
2587             }
2588         }
2589     }
2590
2591     if (valid & addrbits) {
2592         ins->addr_size = addrbits;
2593     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2594         /* Add an address size prefix */
2595         enum prefixes pref = (addrbits == 32) ? P_A16 : P_A32;
2596         ins->prefixes[PPS_ASIZE] = pref;
2597         ins->addr_size = (addrbits == 32) ? 16 : 32;
2598     } else {
2599         /* Impossible... */
2600         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2601         ins->addr_size = addrbits; /* Error recovery */
2602     }
2603
2604     defdisp = ins->addr_size == 16 ? 16 : 32;
2605
2606     for (j = 0; j < ins->operands; j++) {
2607         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2608             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp)
2609             != ins->addr_size) {
2610             /* mem_offs sizes must match the address size; if not,
2611                strip the MEM_OFFS bit and match only EA instructions */
2612             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2613         }
2614     }
2615 }