assemble.c

   1 /* ----------------------------------------------------------------------- *
   2  *
   3  *   Copyright 1996-2010 The NASM Authors - All Rights Reserved
   4  *   See the file AUTHORS included with the NASM distribution for
   5  *   the specific copyright holders.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following
   9  *   conditions are met:
  10  *
  11  *   * Redistributions of source code must retain the above copyright
  12  *     notice, this list of conditions and the following disclaimer.
  13  *   * Redistributions in binary form must reproduce the above
  14  *     copyright notice, this list of conditions and the following
  15  *     disclaimer in the documentation and/or other materials provided
  16  *     with the distribution.
  17  *
  18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  19  *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  20  *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  21  *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22  *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  23  *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24  *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  25  *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28  *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  29  *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  30  *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31  *
  32  * ----------------------------------------------------------------------- */
  33
  34 /*
  35  * assemble.c   code generation for the Netwide Assembler
  36  *
  37  * the actual codes (C syntax, i.e. octal):
  38  * \0            - terminates the code. (Unless it's a literal of course.)
  39  * \1..\4        - that many literal bytes follow in the code stream
  40  * \5            - add 4 to the primary operand number (b, low octdigit)
  41  * \6            - add 4 to the secondary operand number (a, middle octdigit)
  42  * \7            - add 4 to both the primary and the secondary operand number
  43  * \10..\13      - a literal byte follows in the code stream, to be added
  44  *                 to the register value of operand 0..3
  45  * \14..\17      - a signed byte immediate operand, from operand 0..3
  46  * \20..\23      - a byte immediate operand, from operand 0..3
  47  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  48  * \30..\33      - a word immediate operand, from operand 0..3
  49  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  50  *                 assembly mode or the operand-size override on the operand
  51  * \40..\43      - a long immediate operand, from operand 0..3
  52  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  53  *                 depending on the address size of the instruction.
  54  * \50..\53      - a byte relative operand, from operand 0..3
  55  * \54..\57      - a qword immediate operand, from operand 0..3
  56  * \60..\63      - a word relative operand, from operand 0..3
  57  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  58  *                 assembly mode or the operand-size override on the operand
  59  * \70..\73      - a long relative operand, from operand 0..3
  60  * \74..\77      - a word constant, from the _segment_ part of operand 0..3
  61  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  62  *                 field the register value of operand b.
  63  * \140..\143    - an immediate word or signed byte for operand 0..3
  64  * \144..\147    - or 2 (s-field) into opcode byte if operand 0..3
  65  *                  is a signed byte rather than a word.  Opcode byte follows.
  66  * \150..\153    - an immediate dword or signed byte for operand 0..3
  67  * \154..\157    - or 2 (s-field) into opcode byte if operand 0..3
  68  *                  is a signed byte rather than a dword.  Opcode byte follows.
  69  * \160..\163    - this instruction uses DREX rather than REX, with the
  70  *                 OC0 field set to 0, and the dest field taken from
  71  *                 operand 0..3.
  72  * \164..\167    - this instruction uses DREX rather than REX, with the
  73  *                 OC0 field set to 1, and the dest field taken from
  74  *                 operand 0..3.
  75  * \171          - placement of DREX suffix in the absence of an EA
  76  * \172\ab       - the register number from operand a in bits 7..4, with
  77  *                 the 4-bit immediate from operand b in bits 3..0.
  78  * \173\xab      - the register number from operand a in bits 7..4, with
  79  *                 the value b in bits 3..0.
  80  * \174\a        - the register number from operand a in bits 7..4, and
  81  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  82  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  83  *                 field equal to digit b.
  84  * \250..\253    - same as \150..\153, except warn if the 64-bit operand
  85  *                 is not equal to the truncated and sign-extended 32-bit
  86  *                 operand; used for 32-bit immediates in 64-bit mode.
  87  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
  88  * \260..\263    - this instruction uses VEX/XOP rather than REX, with the
  89  *                 V field taken from operand 0..3.
  90  * \270          - this instruction uses VEX/XOP rather than REX, with the
  91  *                 V field set to 1111b.
  92  *
  93  * VEX/XOP prefixes are followed by the sequence:
  94  * \tmm\wlp        where mm is the M field; and wlp is:
  95  *                 00 wwl lpp
  96  *                 [l0]  ll = 0 for L = 0 (.128, .lz)
  97  *                 [l1]  ll = 1 for L = 1 (.256)
  98  *                 [lig] ll = 2 for L don't care (always assembled as 0)
  99  *
 100  *                 [w0]  ww = 0 for W = 0
 101  *                 [w1 ] ww = 1 for W = 1
 102  *                 [wig] ww = 2 for W don't care (always assembled as 0)
 103  *                 [ww]  ww = 3 for W used as REX.W
 104  *
 105  * t = 0 for VEX (C4/C5), t = 1 for XOP (8F).
 106  *
 107  * \274..\277    - a signed byte immediate operand, from operand 0..3,
 108  *                 which is to be extended to the operand size.
 109  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
 110  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
 111  * \312          - (disassembler only) invalid with non-default address size.
 112  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
 113  * \314          - (disassembler only) invalid with REX.B
 114  * \315          - (disassembler only) invalid with REX.X
 115  * \316          - (disassembler only) invalid with REX.R
 116  * \317          - (disassembler only) invalid with REX.W
 117  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
 118  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
 119  * \322          - indicates that this instruction is only valid when the
 120  *                 operand size is the default (instruction to disassembler,
 121  *                 generates no code in the assembler)
 122  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
 123  * \324          - indicates 64-bit operand size requiring REX prefix.
 124  * \325          - instruction which always uses spl/bpl/sil/dil
 125  * \330          - a literal byte follows in the code stream, to be added
 126  *                 to the condition code value of the instruction.
 127  * \331          - instruction not valid with REP prefix.  Hint for
 128  *                 disassembler only; for SSE instructions.
 129  * \332          - REP prefix (0xF2 byte) used as opcode extension.
 130  * \333          - REP prefix (0xF3 byte) used as opcode extension.
 131  * \334          - LOCK prefix used as REX.R (used in non-64-bit mode)
 132  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
 133  * \336          - force a REP(E) prefix (0xF2) even if not specified.
 134  * \337          - force a REPNE prefix (0xF3) even if not specified.
 135  *                 \336-\337 are still listed as prefixes in the disassembler.
 136  * \340          - reserve <operand 0> bytes of uninitialized storage.
 137  *                 Operand 0 had better be a segmentless constant.
 138  * \341          - this instruction needs a WAIT "prefix"
 139  * \344,\345     - the PUSH/POP (respectively) codes for CS, DS, ES, SS
 140  *                 (POP is never used for CS) depending on operand 0
 141  * \346,\347     - the second byte of PUSH/POP codes for FS, GS, depending
 142  *                 on operand 0
 143  * \360          - no SSE prefix (== \364\331)
 144  * \361          - 66 SSE prefix (== \366\331)
 145  * \362          - F2 SSE prefix (== \364\332)
 146  * \363          - F3 SSE prefix (== \364\333)
 147  * \364          - operand-size prefix (0x66) not permitted
 148  * \365          - address-size prefix (0x67) not permitted
 149  * \366          - operand-size prefix (0x66) used as opcode extension
 150  * \367          - address-size prefix (0x67) used as opcode extension
 151  * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
 152  *                 370 is used for Jcc, 371 is used for JMP.
 153  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 154  *                 used for conditional jump over longer jump
 155  */
 156
 157 #include "compiler.h"
 158
 159 #include <stdio.h>
 160 #include <string.h>
 161 #include <inttypes.h>
 162
 163 #include "nasm.h"
 164 #include "nasmlib.h"
 165 #include "assemble.h"
 166 #include "insns.h"
 167 #include "tables.h"
 168
 169 enum match_result {
 170     /*
 171      * Matching errors.  These should be sorted so that more specific
 172      * errors come later in the sequence.
 173      */
 174     MERR_INVALOP,
 175     MERR_OPSIZEMISSING,
 176     MERR_OPSIZEMISMATCH,
 177     MERR_BADCPU,
 178     MERR_BADMODE,
 179     /*
 180      * Matching success; the conditional ones first
 181      */
 182     MOK_JUMP,   /* Matching OK but needs jmp_match() */
 183     MOK_GOOD    /* Matching unconditionally OK */
 184 };
 185
 186 typedef struct {
 187     int sib_present;                 /* is a SIB byte necessary? */
 188     int bytes;                       /* # of bytes of offset needed */
 189     int size;                        /* lazy - this is sib+bytes+1 */
 190     uint8_t modrm, sib, rex, rip;    /* the bytes themselves */
 191 } ea;
 192
 193 static uint32_t cpu;            /* cpu level received from nasm.c */
 194 static efunc errfunc;
 195 static struct ofmt *outfmt;
 196 static ListGen *list;
 197
 198 static int64_t calcsize(int32_t, int64_t, int, insn *, const uint8_t *);
 199 static void gencode(int32_t segment, int64_t offset, int bits,
 200                     insn * ins, const struct itemplate *temp,
 201                     int64_t insn_end);
 202 static enum match_result find_match(const struct itemplate **tempp,
 203                                     insn *instruction,
 204                                     int32_t segment, int64_t offset, int bits);
 205 static enum match_result matches(const struct itemplate *, insn *, int bits);
 206 static opflags_t regflag(const operand *);
 207 static int32_t regval(const operand *);
 208 static int rexflags(int, opflags_t, int);
 209 static int op_rexflags(const operand *, int);
 210 static ea *process_ea(operand *, ea *, int, int, int, opflags_t);
 211 static void add_asp(insn *, int);
 212
 213 static int has_prefix(insn * ins, enum prefix_pos pos, enum prefixes prefix)
 214 {
 215     return ins->prefixes[pos] == prefix;
 216 }
 217
 218 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 219 {
 220     if (ins->prefixes[pos])
 221         errfunc(ERR_NONFATAL, "invalid %s prefix",
 222                 prefix_name(ins->prefixes[pos]));
 223 }
 224
 225 static const char *size_name(int size)
 226 {
 227     switch (size) {
 228     case 1:
 229         return "byte";
 230     case 2:
 231         return "word";
 232     case 4:
 233         return "dword";
 234     case 8:
 235         return "qword";
 236     case 10:
 237         return "tword";
 238     case 16:
 239         return "oword";
 240     case 32:
 241         return "yword";
 242     default:
 243         return "???";
 244     }
 245 }
 246
 247 static void warn_overflow(int pass, int size)
 248 {
 249     errfunc(ERR_WARNING | pass | ERR_WARN_NOV,
 250             "%s data exceeds bounds", size_name(size));
 251 }
 252
 253 static void warn_overflow_const(int64_t data, int size)
 254 {
 255     if (overflow_general(data, size))
 256         warn_overflow(ERR_PASS1, size);
 257 }
 258
 259 static void warn_overflow_opd(const struct operand *o, int size)
 260 {
 261     if (o->wrt == NO_SEG && o->segment == NO_SEG) {
 262         if (overflow_general(o->offset, size))
 263             warn_overflow(ERR_PASS2, size);
 264     }
 265 }
 266
 267 /*
 268  * This routine wrappers the real output format's output routine,
 269  * in order to pass a copy of the data off to the listing file
 270  * generator at the same time.
 271  */
 272 static void out(int64_t offset, int32_t segto, const void *data,
 273                 enum out_type type, uint64_t size,
 274                 int32_t segment, int32_t wrt)
 275 {
 276     static int32_t lineno = 0;     /* static!!! */
 277     static char *lnfname = NULL;
 278     uint8_t p[8];
 279
 280     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 281         /*
 282          * This is a non-relocated address, and we're going to
 283          * convert it into RAWDATA format.
 284          */
 285         uint8_t *q = p;
 286
 287         if (size > 8) {
 288             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 289             return;
 290         }
 291
 292         WRITEADDR(q, *(int64_t *)data, size);
 293         data = p;
 294         type = OUT_RAWDATA;
 295     }
 296
 297     list->output(offset, data, type, size);
 298
 299     /*
 300      * this call to src_get determines when we call the
 301      * debug-format-specific "linenum" function
 302      * it updates lineno and lnfname to the current values
 303      * returning 0 if "same as last time", -2 if lnfname
 304      * changed, and the amount by which lineno changed,
 305      * if it did. thus, these variables must be static
 306      */
 307
 308     if (src_get(&lineno, &lnfname))
 309         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 310
 311     outfmt->output(segto, data, type, size, segment, wrt);
 312 }
 313
 314 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 315                      insn * ins, const uint8_t *code)
 316 {
 317     int64_t isize;
 318     uint8_t c = code[0];
 319
 320     if ((c != 0370 && c != 0371) || (ins->oprs[0].type & STRICT))
 321         return false;
 322     if (!optimizing)
 323         return false;
 324     if (optimizing < 0 && c == 0371)
 325         return false;
 326
 327     isize = calcsize(segment, offset, bits, ins, code);
 328
 329     if (ins->oprs[0].opflags & OPFLAG_UNKNOWN)
 330         /* Be optimistic in pass 1 */
 331         return true;
 332
 333     if (ins->oprs[0].segment != segment)
 334         return false;
 335
 336     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 337     return (isize >= -128 && isize <= 127); /* is it byte size? */
 338 }
 339
 340 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 341                  insn * instruction, struct ofmt *output, efunc error,
 342                  ListGen * listgen)
 343 {
 344     const struct itemplate *temp;
 345     int j;
 346     enum match_result m;
 347     int64_t insn_end;
 348     int32_t itimes;
 349     int64_t start = offset;
 350     int64_t wsize;              /* size for DB etc. */
 351
 352     errfunc = error;            /* to pass to other functions */
 353     cpu = cp;
 354     outfmt = output;            /* likewise */
 355     list = listgen;             /* and again */
 356
 357     wsize = idata_bytes(instruction->opcode);
 358     if (wsize == -1)
 359         return 0;
 360
 361     if (wsize) {
 362         extop *e;
 363         int32_t t = instruction->times;
 364         if (t < 0)
 365             errfunc(ERR_PANIC,
 366                     "instruction->times < 0 (%ld) in assemble()", t);
 367
 368         while (t--) {           /* repeat TIMES times */
 369             list_for_each(e, instruction->eops) {
 370                 if (e->type == EOT_DB_NUMBER) {
 371                     if (wsize > 8) {
 372                         errfunc(ERR_NONFATAL,
 373                                 "integer supplied to a DT, DO or DY"
 374                                 " instruction");
 375                     } else {
 376                         out(offset, segment, &e->offset,
 377                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 378                         offset += wsize;
 379                     }
 380                 } else if (e->type == EOT_DB_STRING ||
 381                            e->type == EOT_DB_STRING_FREE) {
 382                     int align;
 383
 384                     out(offset, segment, e->stringval,
 385                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 386                     align = e->stringlen % wsize;
 387
 388                     if (align) {
 389                         align = wsize - align;
 390                         out(offset, segment, zero_buffer,
 391                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 392                     }
 393                     offset += e->stringlen + align;
 394                 }
 395             }
 396             if (t > 0 && t == instruction->times - 1) {
 397                 /*
 398                  * Dummy call to list->output to give the offset to the
 399                  * listing module.
 400                  */
 401                 list->output(offset, NULL, OUT_RAWDATA, 0);
 402                 list->uplevel(LIST_TIMES);
 403             }
 404         }
 405         if (instruction->times > 1)
 406             list->downlevel(LIST_TIMES);
 407         return offset - start;
 408     }
 409
 410     if (instruction->opcode == I_INCBIN) {
 411         const char *fname = instruction->eops->stringval;
 412         FILE *fp;
 413
 414         fp = fopen(fname, "rb");
 415         if (!fp) {
 416             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 417                   fname);
 418         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 419             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 420                   fname);
 421         } else {
 422             static char buf[4096];
 423             size_t t = instruction->times;
 424             size_t base = 0;
 425             size_t len;
 426
 427             len = ftell(fp);
 428             if (instruction->eops->next) {
 429                 base = instruction->eops->next->offset;
 430                 len -= base;
 431                 if (instruction->eops->next->next &&
 432                     len > (size_t)instruction->eops->next->next->offset)
 433                     len = (size_t)instruction->eops->next->next->offset;
 434             }
 435             /*
 436              * Dummy call to list->output to give the offset to the
 437              * listing module.
 438              */
 439             list->output(offset, NULL, OUT_RAWDATA, 0);
 440             list->uplevel(LIST_INCBIN);
 441             while (t--) {
 442                 size_t l;
 443
 444                 fseek(fp, base, SEEK_SET);
 445                 l = len;
 446                 while (l > 0) {
 447                     int32_t m;
 448                     m = fread(buf, 1, l > sizeof(buf) ? sizeof(buf) : l, fp);
 449                     if (!m) {
 450                         /*
 451                          * This shouldn't happen unless the file
 452                          * actually changes while we are reading
 453                          * it.
 454                          */
 455                         error(ERR_NONFATAL,
 456                               "`incbin': unexpected EOF while"
 457                               " reading file `%s'", fname);
 458                         t = 0;  /* Try to exit cleanly */
 459                         break;
 460                     }
 461                     out(offset, segment, buf, OUT_RAWDATA, m,
 462                         NO_SEG, NO_SEG);
 463                     l -= m;
 464                 }
 465             }
 466             list->downlevel(LIST_INCBIN);
 467             if (instruction->times > 1) {
 468                 /*
 469                  * Dummy call to list->output to give the offset to the
 470                  * listing module.
 471                  */
 472                 list->output(offset, NULL, OUT_RAWDATA, 0);
 473                 list->uplevel(LIST_TIMES);
 474                 list->downlevel(LIST_TIMES);
 475             }
 476             fclose(fp);
 477             return instruction->times * len;
 478         }
 479         return 0;               /* if we're here, there's an error */
 480     }
 481
 482     /* Check to see if we need an address-size prefix */
 483     add_asp(instruction, bits);
 484
 485     m = find_match(&temp, instruction, segment, offset, bits);
 486
 487     if (m == MOK_GOOD) {
 488         /* Matches! */
 489         int64_t insn_size = calcsize(segment, offset, bits,
 490                                      instruction, temp->code);
 491         itimes = instruction->times;
 492         if (insn_size < 0)  /* shouldn't be, on pass two */
 493             error(ERR_PANIC, "errors made it through from pass one");
 494         else
 495             while (itimes--) {
 496                 for (j = 0; j < MAXPREFIX; j++) {
 497                     uint8_t c = 0;
 498                     switch (instruction->prefixes[j]) {
 499                     case P_WAIT:
 500                         c = 0x9B;
 501                         break;
 502                     case P_LOCK:
 503                         c = 0xF0;
 504                         break;
 505                     case P_REPNE:
 506                     case P_REPNZ:
 507                         c = 0xF2;
 508                         break;
 509                     case P_REPE:
 510                     case P_REPZ:
 511                     case P_REP:
 512                         c = 0xF3;
 513                         break;
 514                     case R_CS:
 515                         if (bits == 64) {
 516                             error(ERR_WARNING | ERR_PASS2,
 517                                   "cs segment base generated, but will be ignored in 64-bit mode");
 518                         }
 519                         c = 0x2E;
 520                         break;
 521                     case R_DS:
 522                         if (bits == 64) {
 523                             error(ERR_WARNING | ERR_PASS2,
 524                                   "ds segment base generated, but will be ignored in 64-bit mode");
 525                         }
 526                         c = 0x3E;
 527                         break;
 528                     case R_ES:
 529                         if (bits == 64) {
 530                             error(ERR_WARNING | ERR_PASS2,
 531                                   "es segment base generated, but will be ignored in 64-bit mode");
 532                         }
 533                         c = 0x26;
 534                         break;
 535                     case R_FS:
 536                         c = 0x64;
 537                         break;
 538                     case R_GS:
 539                         c = 0x65;
 540                         break;
 541                     case R_SS:
 542                         if (bits == 64) {
 543                             error(ERR_WARNING | ERR_PASS2,
 544                                   "ss segment base generated, but will be ignored in 64-bit mode");
 545                         }
 546                         c = 0x36;
 547                         break;
 548                     case R_SEGR6:
 549                     case R_SEGR7:
 550                         error(ERR_NONFATAL,
 551                               "segr6 and segr7 cannot be used as prefixes");
 552                         break;
 553                     case P_A16:
 554                         if (bits == 64) {
 555                             error(ERR_NONFATAL,
 556                                   "16-bit addressing is not supported "
 557                                   "in 64-bit mode");
 558                         } else if (bits != 16)
 559                             c = 0x67;
 560                         break;
 561                     case P_A32:
 562                         if (bits != 32)
 563                             c = 0x67;
 564                         break;
 565                     case P_A64:
 566                         if (bits != 64) {
 567                             error(ERR_NONFATAL,
 568                                   "64-bit addressing is only supported "
 569                                   "in 64-bit mode");
 570                         }
 571                         break;
 572                     case P_ASP:
 573                         c = 0x67;
 574                         break;
 575                     case P_O16:
 576                         if (bits != 16)
 577                             c = 0x66;
 578                         break;
 579                     case P_O32:
 580                         if (bits == 16)
 581                             c = 0x66;
 582                         break;
 583                     case P_O64:
 584                         /* REX.W */
 585                         break;
 586                     case P_OSP:
 587                         c = 0x66;
 588                         break;
 589                     case P_none:
 590                         break;
 591                     default:
 592                         error(ERR_PANIC, "invalid instruction prefix");
 593                     }
 594                     if (c != 0) {
 595                         out(offset, segment, &c, OUT_RAWDATA, 1,
 596                             NO_SEG, NO_SEG);
 597                         offset++;
 598                     }
 599                 }
 600                 insn_end = offset + insn_size;
 601                 gencode(segment, offset, bits, instruction,
 602                         temp, insn_end);
 603                 offset += insn_size;
 604                 if (itimes > 0 && itimes == instruction->times - 1) {
 605                     /*
 606                      * Dummy call to list->output to give the offset to the
 607                      * listing module.
 608                      */
 609                     list->output(offset, NULL, OUT_RAWDATA, 0);
 610                     list->uplevel(LIST_TIMES);
 611                 }
 612             }
 613         if (instruction->times > 1)
 614             list->downlevel(LIST_TIMES);
 615         return offset - start;
 616     } else {
 617         /* No match */
 618         switch (m) {
 619         case MERR_OPSIZEMISSING:
 620             error(ERR_NONFATAL, "operation size not specified");
 621             break;
 622         case MERR_OPSIZEMISMATCH:
 623             error(ERR_NONFATAL, "mismatch in operand sizes");
 624             break;
 625         case MERR_BADCPU:
 626             error(ERR_NONFATAL, "no instruction for this cpu level");
 627             break;
 628         case MERR_BADMODE:
 629             error(ERR_NONFATAL, "instruction not supported in %d-bit mode",
 630                   bits);
 631             break;
 632         default:
 633             error(ERR_NONFATAL,
 634                   "invalid combination of opcode and operands");
 635             break;
 636         }
 637     }
 638     return 0;
 639 }
 640
 641 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 642                   insn * instruction, efunc error)
 643 {
 644     const struct itemplate *temp;
 645     enum match_result m;
 646
 647     errfunc = error;            /* to pass to other functions */
 648     cpu = cp;
 649
 650     if (instruction->opcode == I_none)
 651         return 0;
 652
 653     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 654         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 655         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 656         instruction->opcode == I_DY) {
 657         extop *e;
 658         int32_t isize, osize, wsize;
 659
 660         isize = 0;
 661         wsize = idata_bytes(instruction->opcode);
 662
 663         list_for_each(e, instruction->eops) {
 664             int32_t align;
 665
 666             osize = 0;
 667             if (e->type == EOT_DB_NUMBER) {
 668                 osize = 1;
 669                 warn_overflow_const(e->offset, wsize);
 670             } else if (e->type == EOT_DB_STRING ||
 671                        e->type == EOT_DB_STRING_FREE)
 672                 osize = e->stringlen;
 673
 674             align = (-osize) % wsize;
 675             if (align < 0)
 676                 align += wsize;
 677             isize += osize + align;
 678         }
 679         return isize * instruction->times;
 680     }
 681
 682     if (instruction->opcode == I_INCBIN) {
 683         const char *fname = instruction->eops->stringval;
 684         FILE *fp;
 685         int64_t val = 0;
 686         size_t len;
 687
 688         fp = fopen(fname, "rb");
 689         if (!fp)
 690             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 691                   fname);
 692         else if (fseek(fp, 0L, SEEK_END) < 0)
 693             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 694                   fname);
 695         else {
 696             len = ftell(fp);
 697             if (instruction->eops->next) {
 698                 len -= instruction->eops->next->offset;
 699                 if (instruction->eops->next->next &&
 700                     len > (size_t)instruction->eops->next->next->offset) {
 701                     len = (size_t)instruction->eops->next->next->offset;
 702                 }
 703             }
 704             val = instruction->times * len;
 705         }
 706         if (fp)
 707             fclose(fp);
 708         return val;
 709     }
 710
 711     /* Check to see if we need an address-size prefix */
 712     add_asp(instruction, bits);
 713
 714     m = find_match(&temp, instruction, segment, offset, bits);
 715     if (m == MOK_GOOD) {
 716         /* we've matched an instruction. */
 717         int64_t isize;
 718         const uint8_t *codes = temp->code;
 719         int j;
 720
 721         isize = calcsize(segment, offset, bits, instruction, codes);
 722         if (isize < 0)
 723             return -1;
 724         for (j = 0; j < MAXPREFIX; j++) {
 725             switch (instruction->prefixes[j]) {
 726             case P_A16:
 727                 if (bits != 16)
 728                     isize++;
 729                 break;
 730             case P_A32:
 731                 if (bits != 32)
 732                     isize++;
 733                 break;
 734             case P_O16:
 735                 if (bits != 16)
 736                     isize++;
 737                 break;
 738             case P_O32:
 739                 if (bits == 16)
 740                     isize++;
 741                 break;
 742             case P_A64:
 743             case P_O64:
 744             case P_none:
 745                 break;
 746             default:
 747                 isize++;
 748                 break;
 749             }
 750         }
 751         return isize * instruction->times;
 752     } else {
 753         return -1;                  /* didn't match any instruction */
 754     }
 755 }
 756
 757 static bool possible_sbyte(operand *o)
 758 {
 759     return o->wrt == NO_SEG && o->segment == NO_SEG &&
 760         !(o->opflags & OPFLAG_UNKNOWN) &&
 761         optimizing >= 0 && !(o->type & STRICT);
 762 }
 763
 764 /* check that opn[op]  is a signed byte of size 16 or 32 */
 765 static bool is_sbyte16(operand *o)
 766 {
 767     int16_t v;
 768
 769     if (!possible_sbyte(o))
 770         return false;
 771
 772     v = o->offset;
 773     return v >= -128 && v <= 127;
 774 }
 775
 776 static bool is_sbyte32(operand *o)
 777 {
 778     int32_t v;
 779
 780     if (!possible_sbyte(o))
 781         return false;
 782
 783     v = o->offset;
 784     return v >= -128 && v <= 127;
 785 }
 786
 787 /* Common construct */
 788 #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
 789
 790 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 791                         insn * ins, const uint8_t *codes)
 792 {
 793     int64_t length = 0;
 794     uint8_t c;
 795     int rex_mask = ~0;
 796     int op1, op2;
 797     struct operand *opx;
 798     uint8_t opex = 0;
 799
 800     ins->rex = 0;               /* Ensure REX is reset */
 801
 802     if (ins->prefixes[PPS_OSIZE] == P_O64)
 803         ins->rex |= REX_W;
 804
 805     (void)segment;              /* Don't warn that this parameter is unused */
 806     (void)offset;               /* Don't warn that this parameter is unused */
 807
 808     while (*codes) {
 809         c = *codes++;
 810         op1 = (c & 3) + ((opex & 1) << 2);
 811         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
 812         opx = &ins->oprs[op1];
 813         opex = 0;               /* For the next iteration */
 814
 815         switch (c) {
 816         case 01:
 817         case 02:
 818         case 03:
 819         case 04:
 820             codes += c, length += c;
 821             break;
 822
 823         case 05:
 824         case 06:
 825         case 07:
 826             opex = c;
 827             break;
 828
 829         case4(010):
 830             ins->rex |=
 831                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 832             codes++, length++;
 833             break;
 834
 835         case4(014):
 836         case4(020):
 837         case4(024):
 838             length++;
 839             break;
 840
 841         case4(030):
 842             length += 2;
 843             break;
 844
 845         case4(034):
 846             if (opx->type & (BITS16 | BITS32 | BITS64))
 847                 length += (opx->type & BITS16) ? 2 : 4;
 848             else
 849                 length += (bits == 16) ? 2 : 4;
 850             break;
 851
 852         case4(040):
 853             length += 4;
 854             break;
 855
 856         case4(044):
 857             length += ins->addr_size >> 3;
 858             break;
 859
 860         case4(050):
 861             length++;
 862             break;
 863
 864         case4(054):
 865             length += 8; /* MOV reg64/imm */
 866             break;
 867
 868         case4(060):
 869             length += 2;
 870             break;
 871
 872         case4(064):
 873             if (opx->type & (BITS16 | BITS32 | BITS64))
 874                 length += (opx->type & BITS16) ? 2 : 4;
 875             else
 876                 length += (bits == 16) ? 2 : 4;
 877             break;
 878
 879         case4(070):
 880             length += 4;
 881             break;
 882
 883         case4(074):
 884             length += 2;
 885             break;
 886
 887         case4(0140):
 888             length += is_sbyte16(opx) ? 1 : 2;
 889             break;
 890
 891         case4(0144):
 892             codes++;
 893             length++;
 894             break;
 895
 896         case4(0150):
 897             length += is_sbyte32(opx) ? 1 : 4;
 898             break;
 899
 900         case4(0154):
 901             codes++;
 902             length++;
 903             break;
 904
 905         case4(0160):
 906             length++;
 907             ins->rex |= REX_D;
 908             ins->drexdst = regval(opx);
 909             break;
 910
 911         case4(0164):
 912             length++;
 913             ins->rex |= REX_D|REX_OC;
 914             ins->drexdst = regval(opx);
 915             break;
 916
 917         case 0171:
 918             break;
 919
 920         case 0172:
 921         case 0173:
 922         case 0174:
 923             codes++;
 924             length++;
 925             break;
 926
 927         case4(0250):
 928             length += is_sbyte32(opx) ? 1 : 4;
 929             break;
 930
 931         case4(0254):
 932             length += 4;
 933             break;
 934
 935         case4(0260):
 936             ins->rex |= REX_V;
 937             ins->drexdst = regval(opx);
 938             ins->vex_cm = *codes++;
 939             ins->vex_wlp = *codes++;
 940             break;
 941
 942         case 0270:
 943             ins->rex |= REX_V;
 944             ins->drexdst = 0;
 945             ins->vex_cm = *codes++;
 946             ins->vex_wlp = *codes++;
 947             break;
 948
 949         case4(0274):
 950             length++;
 951             break;
 952
 953         case4(0300):
 954             break;
 955
 956         case 0310:
 957             if (bits == 64)
 958                 return -1;
 959             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
 960             break;
 961
 962         case 0311:
 963             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
 964             break;
 965
 966         case 0312:
 967             break;
 968
 969         case 0313:
 970             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
 971                 has_prefix(ins, PPS_ASIZE, P_A32))
 972                 return -1;
 973             break;
 974
 975         case4(0314):
 976             break;
 977
 978         case 0320:
 979             length += (bits != 16);
 980             break;
 981
 982         case 0321:
 983             length += (bits == 16);
 984             break;
 985
 986         case 0322:
 987             break;
 988
 989         case 0323:
 990             rex_mask &= ~REX_W;
 991             break;
 992
 993         case 0324:
 994             ins->rex |= REX_W;
 995             break;
 996
 997         case 0325:
 998             ins->rex |= REX_NH;
 999             break;
1000
1001         case 0330:
1002             codes++, length++;
1003             break;
1004
1005         case 0331:
1006             break;
1007
1008         case 0332:
1009         case 0333:
1010             length++;
1011             break;
1012
1013         case 0334:
1014             ins->rex |= REX_L;
1015             break;
1016
1017         case 0335:
1018             break;
1019
1020         case 0336:
1021             if (!ins->prefixes[PPS_LREP])
1022                 ins->prefixes[PPS_LREP] = P_REP;
1023             break;
1024
1025         case 0337:
1026             if (!ins->prefixes[PPS_LREP])
1027                 ins->prefixes[PPS_LREP] = P_REPNE;
1028             break;
1029
1030         case 0340:
1031             if (ins->oprs[0].segment != NO_SEG)
1032                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1033                         " quantity of BSS space");
1034             else
1035                 length += ins->oprs[0].offset;
1036             break;
1037
1038         case 0341:
1039             if (!ins->prefixes[PPS_WAIT])
1040                 ins->prefixes[PPS_WAIT] = P_WAIT;
1041             break;
1042
1043         case4(0344):
1044             length++;
1045             break;
1046
1047         case 0360:
1048             break;
1049
1050         case 0361:
1051         case 0362:
1052         case 0363:
1053             length++;
1054             break;
1055
1056         case 0364:
1057         case 0365:
1058             break;
1059
1060         case 0366:
1061         case 0367:
1062             length++;
1063             break;
1064
1065         case 0370:
1066         case 0371:
1067         case 0372:
1068             break;
1069
1070         case 0373:
1071             length++;
1072             break;
1073
1074         case4(0100):
1075         case4(0110):
1076         case4(0120):
1077         case4(0130):
1078         case4(0200):
1079         case4(0204):
1080         case4(0210):
1081         case4(0214):
1082         case4(0220):
1083         case4(0224):
1084         case4(0230):
1085         case4(0234):
1086             {
1087                 ea ea_data;
1088                 int rfield;
1089                 opflags_t rflags;
1090                 struct operand *opy = &ins->oprs[op2];
1091
1092                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1093
1094                 if (c <= 0177) {
1095                     /* pick rfield from operand b (opx) */
1096                     rflags = regflag(opx);
1097                     rfield = nasm_regvals[opx->basereg];
1098                 } else {
1099                     rflags = 0;
1100                     rfield = c & 7;
1101                 }
1102                 if (!process_ea(opy, &ea_data, bits,
1103                                 ins->addr_size, rfield, rflags)) {
1104                     errfunc(ERR_NONFATAL, "invalid effective address");
1105                     return -1;
1106                 } else {
1107                     ins->rex |= ea_data.rex;
1108                     length += ea_data.size;
1109                 }
1110             }
1111             break;
1112
1113         default:
1114             errfunc(ERR_PANIC, "internal instruction table corrupt"
1115                     ": instruction code \\%o (0x%02X) given", c, c);
1116             break;
1117         }
1118     }
1119
1120     ins->rex &= rex_mask;
1121
1122     if (ins->rex & REX_NH) {
1123         if (ins->rex & REX_H) {
1124             errfunc(ERR_NONFATAL, "instruction cannot use high registers");
1125             return -1;
1126         }
1127         ins->rex &= ~REX_P;        /* Don't force REX prefix due to high reg */
1128     }
1129
1130     if (ins->rex & REX_V) {
1131         int bad32 = REX_R|REX_W|REX_X|REX_B;
1132
1133         if (ins->rex & REX_H) {
1134             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1135             return -1;
1136         }
1137         switch (ins->vex_wlp & 060) {
1138         case 000:
1139         case 040:
1140             ins->rex &= ~REX_W;
1141             break;
1142         case 020:
1143             ins->rex |= REX_W;
1144             bad32 &= ~REX_W;
1145             break;
1146         case 060:
1147             /* Follow REX_W */
1148             break;
1149         }
1150
1151         if (bits != 64 && ((ins->rex & bad32) || ins->drexdst > 7)) {
1152             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1153             return -1;
1154         }
1155         if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_R|REX_B)))
1156             length += 3;
1157         else
1158             length += 2;
1159     } else if (ins->rex & REX_D) {
1160         if (ins->rex & REX_H) {
1161             errfunc(ERR_NONFATAL, "cannot use high register in drex instruction");
1162             return -1;
1163         }
1164         if (bits != 64 && ((ins->rex & (REX_R|REX_W|REX_X|REX_B)) ||
1165                            ins->drexdst > 7)) {
1166             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1167             return -1;
1168         }
1169         length++;
1170     } else if (ins->rex & REX_REAL) {
1171         if (ins->rex & REX_H) {
1172             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1173             return -1;
1174         } else if (bits == 64) {
1175             length++;
1176         } else if ((ins->rex & REX_L) &&
1177                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1178                    cpu >= IF_X86_64) {
1179             /* LOCK-as-REX.R */
1180             assert_no_prefix(ins, PPS_LREP);
1181             length++;
1182         } else {
1183             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1184             return -1;
1185         }
1186     }
1187
1188     return length;
1189 }
1190
1191 #define EMIT_REX()                                                              \
1192     if (!(ins->rex & (REX_D|REX_V)) && (ins->rex & REX_REAL) && (bits == 64)) { \
1193         ins->rex = (ins->rex & REX_REAL)|REX_P;                                 \
1194         out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG);        \
1195         ins->rex = 0;                                                           \
1196         offset += 1;                                                            \
1197     }
1198
1199 static void gencode(int32_t segment, int64_t offset, int bits,
1200                     insn * ins, const struct itemplate *temp,
1201                     int64_t insn_end)
1202 {
1203     static char condval[] = {   /* conditional opcodes */
1204         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1205         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1206         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1207     };
1208     uint8_t c;
1209     uint8_t bytes[4];
1210     int64_t size;
1211     int64_t data;
1212     int op1, op2;
1213     struct operand *opx;
1214     const uint8_t *codes = temp->code;
1215     uint8_t opex = 0;
1216
1217     while (*codes) {
1218         c = *codes++;
1219         op1 = (c & 3) + ((opex & 1) << 2);
1220         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
1221         opx = &ins->oprs[op1];
1222         opex = 0;                /* For the next iteration */
1223
1224         switch (c) {
1225         case 01:
1226         case 02:
1227         case 03:
1228         case 04:
1229             EMIT_REX();
1230             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1231             codes += c;
1232             offset += c;
1233             break;
1234
1235         case 05:
1236         case 06:
1237         case 07:
1238             opex = c;
1239             break;
1240
1241         case4(010):
1242             EMIT_REX();
1243             bytes[0] = *codes++ + (regval(opx) & 7);
1244             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1245             offset += 1;
1246             break;
1247
1248         case4(014):
1249             /*
1250              * The test for BITS8 and SBYTE here is intended to avoid
1251              * warning on optimizer actions due to SBYTE, while still
1252              * warn on explicit BYTE directives.  Also warn, obviously,
1253              * if the optimizer isn't enabled.
1254              */
1255             if (((opx->type & BITS8) ||
1256                  !(opx->type & temp->opd[op1] & BYTENESS)) &&
1257                 (opx->offset < -128 || opx->offset > 127)) {
1258                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1259                         "signed byte value exceeds bounds");
1260             }
1261             if (opx->segment != NO_SEG) {
1262                 data = opx->offset;
1263                 out(offset, segment, &data, OUT_ADDRESS, 1,
1264                     opx->segment, opx->wrt);
1265             } else {
1266                 bytes[0] = opx->offset;
1267                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1268                     NO_SEG);
1269             }
1270             offset += 1;
1271             break;
1272
1273         case4(020):
1274             if (opx->offset < -256 || opx->offset > 255) {
1275                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1276                         "byte value exceeds bounds");
1277             }
1278             if (opx->segment != NO_SEG) {
1279                 data = opx->offset;
1280                 out(offset, segment, &data, OUT_ADDRESS, 1,
1281                     opx->segment, opx->wrt);
1282             } else {
1283                 bytes[0] = opx->offset;
1284                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1285                     NO_SEG);
1286             }
1287             offset += 1;
1288             break;
1289
1290         case4(024):
1291             if (opx->offset < 0 || opx->offset > 255)
1292                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1293                         "unsigned byte value exceeds bounds");
1294             if (opx->segment != NO_SEG) {
1295                 data = opx->offset;
1296                 out(offset, segment, &data, OUT_ADDRESS, 1,
1297                     opx->segment, opx->wrt);
1298             } else {
1299                 bytes[0] = opx->offset;
1300                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1301                     NO_SEG);
1302             }
1303             offset += 1;
1304             break;
1305
1306         case4(030):
1307             warn_overflow_opd(opx, 2);
1308             data = opx->offset;
1309             out(offset, segment, &data, OUT_ADDRESS, 2,
1310                 opx->segment, opx->wrt);
1311             offset += 2;
1312             break;
1313
1314         case4(034):
1315             if (opx->type & (BITS16 | BITS32))
1316                 size = (opx->type & BITS16) ? 2 : 4;
1317             else
1318                 size = (bits == 16) ? 2 : 4;
1319             warn_overflow_opd(opx, size);
1320             data = opx->offset;
1321             out(offset, segment, &data, OUT_ADDRESS, size,
1322                 opx->segment, opx->wrt);
1323             offset += size;
1324             break;
1325
1326         case4(040):
1327             warn_overflow_opd(opx, 4);
1328             data = opx->offset;
1329             out(offset, segment, &data, OUT_ADDRESS, 4,
1330                 opx->segment, opx->wrt);
1331             offset += 4;
1332             break;
1333
1334         case4(044):
1335             data = opx->offset;
1336             size = ins->addr_size >> 3;
1337             warn_overflow_opd(opx, size);
1338             out(offset, segment, &data, OUT_ADDRESS, size,
1339                 opx->segment, opx->wrt);
1340             offset += size;
1341             break;
1342
1343         case4(050):
1344             if (opx->segment != segment) {
1345                 data = opx->offset;
1346                 out(offset, segment, &data,
1347                     OUT_REL1ADR, insn_end - offset,
1348                     opx->segment, opx->wrt);
1349             } else {
1350                 data = opx->offset - insn_end;
1351                 if (data > 127 || data < -128)
1352                     errfunc(ERR_NONFATAL, "short jump is out of range");
1353                 out(offset, segment, &data,
1354                     OUT_ADDRESS, 1, NO_SEG, NO_SEG);
1355             }
1356             offset += 1;
1357             break;
1358
1359         case4(054):
1360             data = (int64_t)opx->offset;
1361             out(offset, segment, &data, OUT_ADDRESS, 8,
1362                 opx->segment, opx->wrt);
1363             offset += 8;
1364             break;
1365
1366         case4(060):
1367             if (opx->segment != segment) {
1368                 data = opx->offset;
1369                 out(offset, segment, &data,
1370                     OUT_REL2ADR, insn_end - offset,
1371                     opx->segment, opx->wrt);
1372             } else {
1373                 data = opx->offset - insn_end;
1374                 out(offset, segment, &data,
1375                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1376             }
1377             offset += 2;
1378             break;
1379
1380         case4(064):
1381             if (opx->type & (BITS16 | BITS32 | BITS64))
1382                 size = (opx->type & BITS16) ? 2 : 4;
1383             else
1384                 size = (bits == 16) ? 2 : 4;
1385             if (opx->segment != segment) {
1386                 data = opx->offset;
1387                 out(offset, segment, &data,
1388                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1389                     insn_end - offset, opx->segment, opx->wrt);
1390             } else {
1391                 data = opx->offset - insn_end;
1392                 out(offset, segment, &data,
1393                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1394             }
1395             offset += size;
1396             break;
1397
1398         case4(070):
1399             if (opx->segment != segment) {
1400                 data = opx->offset;
1401                 out(offset, segment, &data,
1402                     OUT_REL4ADR, insn_end - offset,
1403                     opx->segment, opx->wrt);
1404             } else {
1405                 data = opx->offset - insn_end;
1406                 out(offset, segment, &data,
1407                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1408             }
1409             offset += 4;
1410             break;
1411
1412         case4(074):
1413             if (opx->segment == NO_SEG)
1414                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1415                         " relocatable");
1416             data = 0;
1417             out(offset, segment, &data, OUT_ADDRESS, 2,
1418                 outfmt->segbase(1 + opx->segment),
1419                 opx->wrt);
1420             offset += 2;
1421             break;
1422
1423         case4(0140):
1424             data = opx->offset;
1425             warn_overflow_opd(opx, 2);
1426             if (is_sbyte16(opx)) {
1427                 bytes[0] = data;
1428                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1429                     NO_SEG);
1430                 offset++;
1431             } else {
1432                 out(offset, segment, &data, OUT_ADDRESS, 2,
1433                     opx->segment, opx->wrt);
1434                 offset += 2;
1435             }
1436             break;
1437
1438         case4(0144):
1439             EMIT_REX();
1440             bytes[0] = *codes++;
1441             if (is_sbyte16(opx))
1442                 bytes[0] |= 2;  /* s-bit */
1443             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1444             offset++;
1445             break;
1446
1447         case4(0150):
1448             data = opx->offset;
1449             warn_overflow_opd(opx, 4);
1450             if (is_sbyte32(opx)) {
1451                 bytes[0] = data;
1452                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1453                     NO_SEG);
1454                 offset++;
1455             } else {
1456                 out(offset, segment, &data, OUT_ADDRESS, 4,
1457                     opx->segment, opx->wrt);
1458                 offset += 4;
1459             }
1460             break;
1461
1462         case4(0154):
1463             EMIT_REX();
1464             bytes[0] = *codes++;
1465             if (is_sbyte32(opx))
1466                 bytes[0] |= 2;  /* s-bit */
1467             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1468             offset++;
1469             break;
1470
1471         case4(0160):
1472         case4(0164):
1473             break;
1474
1475         case 0171:
1476             bytes[0] =
1477                 (ins->drexdst << 4) |
1478                 (ins->rex & REX_OC ? 0x08 : 0) |
1479                 (ins->rex & (REX_R|REX_X|REX_B));
1480             ins->rex = 0;
1481             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1482             offset++;
1483             break;
1484
1485         case 0172:
1486             c = *codes++;
1487             opx = &ins->oprs[c >> 3];
1488             bytes[0] = nasm_regvals[opx->basereg] << 4;
1489             opx = &ins->oprs[c & 7];
1490             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1491                 errfunc(ERR_NONFATAL,
1492                         "non-absolute expression not permitted as argument %d",
1493                         c & 7);
1494             } else {
1495                 if (opx->offset & ~15) {
1496                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1497                             "four-bit argument exceeds bounds");
1498                 }
1499                 bytes[0] |= opx->offset & 15;
1500             }
1501             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1502             offset++;
1503             break;
1504
1505         case 0173:
1506             c = *codes++;
1507             opx = &ins->oprs[c >> 4];
1508             bytes[0] = nasm_regvals[opx->basereg] << 4;
1509             bytes[0] |= c & 15;
1510             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1511             offset++;
1512             break;
1513
1514         case 0174:
1515             c = *codes++;
1516             opx = &ins->oprs[c];
1517             bytes[0] = nasm_regvals[opx->basereg] << 4;
1518             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1519             offset++;
1520             break;
1521
1522         case4(0250):
1523             data = opx->offset;
1524             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1525                 (int32_t)data != (int64_t)data) {
1526                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1527                         "signed dword immediate exceeds bounds");
1528             }
1529             if (is_sbyte32(opx)) {
1530                 bytes[0] = data;
1531                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1532                     NO_SEG);
1533                 offset++;
1534             } else {
1535                 out(offset, segment, &data, OUT_ADDRESS, 4,
1536                     opx->segment, opx->wrt);
1537                 offset += 4;
1538             }
1539             break;
1540
1541         case4(0254):
1542             data = opx->offset;
1543             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1544                 (int32_t)data != (int64_t)data) {
1545                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1546                         "signed dword immediate exceeds bounds");
1547             }
1548             out(offset, segment, &data, OUT_ADDRESS, 4,
1549                 opx->segment, opx->wrt);
1550             offset += 4;
1551             break;
1552
1553         case4(0260):
1554         case 0270:
1555             codes += 2;
1556             if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1557                 bytes[0] = (ins->vex_cm >> 6) ? 0x8f : 0xc4;
1558                 bytes[1] = (ins->vex_cm & 31) | ((~ins->rex & 7) << 5);
1559                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1560                     ((~ins->drexdst & 15)<< 3) | (ins->vex_wlp & 07);
1561                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1562                 offset += 3;
1563             } else {
1564                 bytes[0] = 0xc5;
1565                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1566                     ((~ins->drexdst & 15) << 3) | (ins->vex_wlp & 07);
1567                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1568                 offset += 2;
1569             }
1570             break;
1571
1572         case4(0274):
1573         {
1574             uint64_t uv, um;
1575             int s;
1576
1577             if (ins->rex & REX_W)
1578                 s = 64;
1579             else if (ins->prefixes[PPS_OSIZE] == P_O16)
1580                 s = 16;
1581             else if (ins->prefixes[PPS_OSIZE] == P_O32)
1582                 s = 32;
1583             else
1584                 s = bits;
1585
1586             um = (uint64_t)2 << (s-1);
1587             uv = opx->offset;
1588
1589             if (uv > 127 && uv < (uint64_t)-128 &&
1590                 (uv < um-128 || uv > um-1)) {
1591                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1592                         "signed byte value exceeds bounds");
1593             }
1594             if (opx->segment != NO_SEG) {
1595                 data = uv;
1596                 out(offset, segment, &data, OUT_ADDRESS, 1,
1597                     opx->segment, opx->wrt);
1598             } else {
1599                 bytes[0] = uv;
1600                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1601                     NO_SEG);
1602             }
1603             offset += 1;
1604             break;
1605         }
1606
1607         case4(0300):
1608             break;
1609
1610         case 0310:
1611             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1612                 *bytes = 0x67;
1613                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1614                 offset += 1;
1615             } else
1616                 offset += 0;
1617             break;
1618
1619         case 0311:
1620             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1621                 *bytes = 0x67;
1622                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1623                 offset += 1;
1624             } else
1625                 offset += 0;
1626             break;
1627
1628         case 0312:
1629             break;
1630
1631         case 0313:
1632             ins->rex = 0;
1633             break;
1634
1635         case4(0314):
1636             break;
1637
1638         case 0320:
1639             if (bits != 16) {
1640                 *bytes = 0x66;
1641                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1642                 offset += 1;
1643             } else
1644                 offset += 0;
1645             break;
1646
1647         case 0321:
1648             if (bits == 16) {
1649                 *bytes = 0x66;
1650                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1651                 offset += 1;
1652             } else
1653                 offset += 0;
1654             break;
1655
1656         case 0322:
1657         case 0323:
1658             break;
1659
1660         case 0324:
1661             ins->rex |= REX_W;
1662             break;
1663
1664         case 0325:
1665             break;
1666
1667         case 0330:
1668             *bytes = *codes++ ^ condval[ins->condition];
1669             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1670             offset += 1;
1671             break;
1672
1673         case 0331:
1674             break;
1675
1676         case 0332:
1677         case 0333:
1678             *bytes = c - 0332 + 0xF2;
1679             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1680             offset += 1;
1681             break;
1682
1683         case 0334:
1684             if (ins->rex & REX_R) {
1685                 *bytes = 0xF0;
1686                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1687                 offset += 1;
1688             }
1689             ins->rex &= ~(REX_L|REX_R);
1690             break;
1691
1692         case 0335:
1693             break;
1694
1695         case 0336:
1696         case 0337:
1697             break;
1698
1699         case 0340:
1700             if (ins->oprs[0].segment != NO_SEG)
1701                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1702             else {
1703                 int64_t size = ins->oprs[0].offset;
1704                 if (size > 0)
1705                     out(offset, segment, NULL,
1706                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1707                 offset += size;
1708             }
1709             break;
1710
1711         case 0341:
1712             break;
1713
1714         case 0344:
1715         case 0345:
1716             bytes[0] = c & 1;
1717             switch (ins->oprs[0].basereg) {
1718             case R_CS:
1719                 bytes[0] += 0x0E;
1720                 break;
1721             case R_DS:
1722                 bytes[0] += 0x1E;
1723                 break;
1724             case R_ES:
1725                 bytes[0] += 0x06;
1726                 break;
1727             case R_SS:
1728                 bytes[0] += 0x16;
1729                 break;
1730             default:
1731                 errfunc(ERR_PANIC,
1732                         "bizarre 8086 segment register received");
1733             }
1734             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1735             offset++;
1736             break;
1737
1738         case 0346:
1739         case 0347:
1740             bytes[0] = c & 1;
1741             switch (ins->oprs[0].basereg) {
1742             case R_FS:
1743                 bytes[0] += 0xA0;
1744                 break;
1745             case R_GS:
1746                 bytes[0] += 0xA8;
1747                 break;
1748             default:
1749                 errfunc(ERR_PANIC,
1750                         "bizarre 386 segment register received");
1751             }
1752             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1753             offset++;
1754             break;
1755
1756         case 0360:
1757             break;
1758
1759         case 0361:
1760             bytes[0] = 0x66;
1761             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1762             offset += 1;
1763             break;
1764
1765         case 0362:
1766         case 0363:
1767             bytes[0] = c - 0362 + 0xf2;
1768             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1769             offset += 1;
1770             break;
1771
1772         case 0364:
1773         case 0365:
1774             break;
1775
1776         case 0366:
1777         case 0367:
1778             *bytes = c - 0366 + 0x66;
1779             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1780             offset += 1;
1781             break;
1782
1783         case 0370:
1784         case 0371:
1785         case 0372:
1786             break;
1787
1788         case 0373:
1789             *bytes = bits == 16 ? 3 : 5;
1790             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1791             offset += 1;
1792             break;
1793
1794         case4(0100):
1795         case4(0110):
1796         case4(0120):
1797         case4(0130):
1798         case4(0200):
1799         case4(0204):
1800         case4(0210):
1801         case4(0214):
1802         case4(0220):
1803         case4(0224):
1804         case4(0230):
1805         case4(0234):
1806             {
1807                 ea ea_data;
1808                 int rfield;
1809                 opflags_t rflags;
1810                 uint8_t *p;
1811                 int32_t s;
1812                 enum out_type type;
1813                 struct operand *opy = &ins->oprs[op2];
1814
1815                 if (c <= 0177) {
1816                     /* pick rfield from operand b (opx) */
1817                     rflags = regflag(opx);
1818                     rfield = nasm_regvals[opx->basereg];
1819                 } else {
1820                     /* rfield is constant */
1821                     rflags = 0;
1822                     rfield = c & 7;
1823                 }
1824
1825                 if (!process_ea(opy, &ea_data, bits, ins->addr_size,
1826                                 rfield, rflags)) {
1827                     errfunc(ERR_NONFATAL, "invalid effective address");
1828                 }
1829
1830
1831                 p = bytes;
1832                 *p++ = ea_data.modrm;
1833                 if (ea_data.sib_present)
1834                     *p++ = ea_data.sib;
1835
1836                 /* DREX suffixes come between the SIB and the displacement */
1837                 if (ins->rex & REX_D) {
1838                     *p++ = (ins->drexdst << 4) |
1839                            (ins->rex & REX_OC ? 0x08 : 0) |
1840                            (ins->rex & (REX_R|REX_X|REX_B));
1841                     ins->rex = 0;
1842                 }
1843
1844                 s = p - bytes;
1845                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1846
1847                 /*
1848                  * Make sure the address gets the right offset in case
1849                  * the line breaks in the .lst file (BR 1197827)
1850                  */
1851                 offset += s;
1852                 s = 0;
1853
1854                 switch (ea_data.bytes) {
1855                 case 0:
1856                     break;
1857                 case 1:
1858                 case 2:
1859                 case 4:
1860                 case 8:
1861                     data = opy->offset;
1862                     s += ea_data.bytes;
1863                     if (ea_data.rip) {
1864                         if (opy->segment == segment) {
1865                             data -= insn_end;
1866                             if (overflow_signed(data, ea_data.bytes))
1867                                 warn_overflow(ERR_PASS2, ea_data.bytes);
1868                             out(offset, segment, &data, OUT_ADDRESS,
1869                                 ea_data.bytes, NO_SEG, NO_SEG);
1870                         } else {
1871                             /* overflow check in output/linker? */
1872                             out(offset, segment, &data,        OUT_REL4ADR,
1873                                 insn_end - offset, opy->segment, opy->wrt);
1874                         }
1875                     } else {
1876                         if (overflow_general(opy->offset, ins->addr_size >> 3) ||
1877                             signed_bits(opy->offset, ins->addr_size) !=
1878                             signed_bits(opy->offset, ea_data.bytes * 8))
1879                             warn_overflow(ERR_PASS2, ea_data.bytes);
1880
1881                         type = OUT_ADDRESS;
1882                         out(offset, segment, &data, OUT_ADDRESS,
1883                             ea_data.bytes, opy->segment, opy->wrt);
1884                     }
1885                     break;
1886                 default:
1887                     /* Impossible! */
1888                     errfunc(ERR_PANIC,
1889                             "Invalid amount of bytes (%d) for offset?!",
1890                             ea_data.bytes);
1891                     break;
1892                 }
1893                 offset += s;
1894             }
1895             break;
1896
1897         default:
1898             errfunc(ERR_PANIC, "internal instruction table corrupt"
1899                     ": instruction code \\%o (0x%02X) given", c, c);
1900             break;
1901         }
1902     }
1903 }
1904
1905 static opflags_t regflag(const operand * o)
1906 {
1907     if (!is_register(o->basereg))
1908         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1909     return nasm_reg_flags[o->basereg];
1910 }
1911
1912 static int32_t regval(const operand * o)
1913 {
1914     if (!is_register(o->basereg))
1915         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1916     return nasm_regvals[o->basereg];
1917 }
1918
1919 static int op_rexflags(const operand * o, int mask)
1920 {
1921     opflags_t flags;
1922     int val;
1923
1924     if (!is_register(o->basereg))
1925         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1926
1927     flags = nasm_reg_flags[o->basereg];
1928     val = nasm_regvals[o->basereg];
1929
1930     return rexflags(val, flags, mask);
1931 }
1932
1933 static int rexflags(int val, opflags_t flags, int mask)
1934 {
1935     int rex = 0;
1936
1937     if (val >= 8)
1938         rex |= REX_B|REX_X|REX_R;
1939     if (flags & BITS64)
1940         rex |= REX_W;
1941     if (!(REG_HIGH & ~flags))                   /* AH, CH, DH, BH */
1942         rex |= REX_H;
1943     else if (!(REG8 & ~flags) && val >= 4)      /* SPL, BPL, SIL, DIL */
1944         rex |= REX_P;
1945
1946     return rex & mask;
1947 }
1948
1949 static enum match_result find_match(const struct itemplate **tempp,
1950                                     insn *instruction,
1951                                     int32_t segment, int64_t offset, int bits)
1952 {
1953     const struct itemplate *temp;
1954     enum match_result m, merr;
1955     opflags_t xsizeflags[MAX_OPERANDS];
1956     bool opsizemissing = false;
1957     int i;
1958
1959     for (i = 0; i < instruction->operands; i++)
1960         xsizeflags[i] = instruction->oprs[i].type & SIZE_MASK;
1961
1962     merr = MERR_INVALOP;
1963
1964     for (temp = nasm_instructions[instruction->opcode];
1965          temp->opcode != I_none; temp++) {
1966         m = matches(temp, instruction, bits);
1967         if (m == MOK_JUMP) {
1968             if (jmp_match(segment, offset, bits, instruction, temp->code))
1969                 m = MOK_GOOD;
1970             else
1971                 m = MERR_INVALOP;
1972         } else if (m == MERR_OPSIZEMISSING &&
1973                    (temp->flags & IF_SMASK) != IF_SX) {
1974             /*
1975              * Missing operand size and a candidate for fuzzy matching...
1976              */
1977             for (i = 0; i < temp->operands; i++) {
1978                 if ((temp->opd[i] & SAME_AS) == 0)
1979                     xsizeflags[i] |= temp->opd[i] & SIZE_MASK;
1980             }
1981             opsizemissing = true;
1982         }
1983         if (m > merr)
1984             merr = m;
1985         if (merr == MOK_GOOD)
1986             goto done;
1987     }
1988
1989     /* No match, but see if we can get a fuzzy operand size match... */
1990     if (!opsizemissing)
1991         goto done;
1992
1993     for (i = 0; i < instruction->operands; i++) {
1994         /*
1995          * We ignore extrinsic operand sizes on registers, so we should
1996          * never try to fuzzy-match on them.  This also resolves the case
1997          * when we have e.g. "xmmrm128" in two different positions.
1998          */
1999         if (is_class(REGISTER, instruction->oprs[i].type))
2000             continue;
2001
2002         /* This tests if xsizeflags[i] has more than one bit set */
2003         if ((xsizeflags[i] & (xsizeflags[i]-1)))
2004             goto done;                /* No luck */
2005
2006         instruction->oprs[i].type |= xsizeflags[i]; /* Set the size */
2007     }
2008
2009     /* Try matching again... */
2010     for (temp = nasm_instructions[instruction->opcode];
2011          temp->opcode != I_none; temp++) {
2012         m = matches(temp, instruction, bits);
2013         if (m == MOK_JUMP) {
2014             if (jmp_match(segment, offset, bits, instruction, temp->code))
2015                 m = MOK_GOOD;
2016             else
2017                 m = MERR_INVALOP;
2018         }
2019         if (m > merr)
2020             merr = m;
2021         if (merr == MOK_GOOD)
2022             goto done;
2023     }
2024
2025 done:
2026     *tempp = temp;
2027     return merr;
2028 }
2029
2030 static enum match_result matches(const struct itemplate *itemp,
2031                                  insn *instruction, int bits)
2032 {
2033     int i, size[MAX_OPERANDS], asize, oprs;
2034     bool opsizemissing = false;
2035
2036     /*
2037      * Check the opcode
2038      */
2039     if (itemp->opcode != instruction->opcode)
2040         return MERR_INVALOP;
2041
2042     /*
2043      * Count the operands
2044      */
2045     if (itemp->operands != instruction->operands)
2046         return MERR_INVALOP;
2047
2048     /*
2049      * Is it legal?
2050      */
2051     if (!(optimizing > 0) && (itemp->flags & IF_OPT))
2052         return MERR_INVALOP;
2053
2054     /*
2055      * Check that no spurious colons or TOs are present
2056      */
2057     for (i = 0; i < itemp->operands; i++)
2058         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
2059             return MERR_INVALOP;
2060
2061     /*
2062      * Process size flags
2063      */
2064     switch (itemp->flags & IF_SMASK) {
2065     case IF_SB:
2066         asize = BITS8;
2067         break;
2068     case IF_SW:
2069         asize = BITS16;
2070         break;
2071     case IF_SD:
2072         asize = BITS32;
2073         break;
2074     case IF_SQ:
2075         asize = BITS64;
2076         break;
2077     case IF_SO:
2078         asize = BITS128;
2079         break;
2080     case IF_SY:
2081         asize = BITS256;
2082         break;
2083     case IF_SZ:
2084         switch (bits) {
2085         case 16:
2086             asize = BITS16;
2087             break;
2088         case 32:
2089             asize = BITS32;
2090             break;
2091         case 64:
2092             asize = BITS64;
2093             break;
2094         default:
2095             asize = 0;
2096             break;
2097         }
2098         break;
2099     default:
2100         asize = 0;
2101         break;
2102     }
2103
2104     if (itemp->flags & IF_ARMASK) {
2105         /* S- flags only apply to a specific operand */
2106         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
2107         memset(size, 0, sizeof size);
2108         size[i] = asize;
2109     } else {
2110         /* S- flags apply to all operands */
2111         for (i = 0; i < MAX_OPERANDS; i++)
2112             size[i] = asize;
2113     }
2114
2115     /*
2116      * Check that the operand flags all match up,
2117      * it's a bit tricky so lets be verbose:
2118      *
2119      * 1) Find out the size of operand. If instruction
2120      *    doesn't have one specified -- we're trying to
2121      *    guess it either from template (IF_S* flag) or
2122      *    from code bits.
2123      *
2124      * 2) If template operand (i) has SAME_AS flag [used for registers only]
2125      *    (ie the same operand as was specified somewhere in template, and
2126      *    this referred operand index is being achieved via ~SAME_AS)
2127      *    we are to be sure that both registers (in template and instruction)
2128      *    do exactly match.
2129      *
2130      * 3) If template operand do not match the instruction OR
2131      *    template has an operand size specified AND this size differ
2132      *    from which instruction has (perhaps we got it from code bits)
2133      *    we are:
2134      *      a)  Check that only size of instruction and operand is differ
2135      *          other characteristics do match
2136      *      b)  Perhaps it's a register specified in instruction so
2137      *          for such a case we just mark that operand as "size
2138      *          missing" and this will turn on fuzzy operand size
2139      *          logic facility (handled by a caller)
2140      */
2141     for (i = 0; i < itemp->operands; i++) {
2142         opflags_t type = instruction->oprs[i].type;
2143         if (!(type & SIZE_MASK))
2144             type |= size[i];
2145
2146         if (itemp->opd[i] & SAME_AS) {
2147             int j = itemp->opd[i] & ~SAME_AS;
2148             if (type != instruction->oprs[j].type ||
2149                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2150                 return MERR_INVALOP;
2151         } else if (itemp->opd[i] & ~type ||
2152             ((itemp->opd[i] & SIZE_MASK) &&
2153              ((itemp->opd[i] ^ type) & SIZE_MASK))) {
2154             if ((itemp->opd[i] & ~type & ~SIZE_MASK) || (type & SIZE_MASK)) {
2155                 return MERR_INVALOP;
2156             } else if (!is_class(REGISTER, type)) {
2157                 /*
2158                  * Note: we don't honor extrinsic operand sizes for registers,
2159                  * so "missing operand size" for a register should be
2160                  * considered a wildcard match rather than an error.
2161                  */
2162                 opsizemissing = true;
2163             }
2164         }
2165     }
2166
2167     if (opsizemissing)
2168         return MERR_OPSIZEMISSING;
2169
2170     /*
2171      * Check operand sizes
2172      */
2173     if (itemp->flags & (IF_SM | IF_SM2)) {
2174         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2175         for (i = 0; i < oprs; i++) {
2176             asize = itemp->opd[i] & SIZE_MASK;
2177             if (asize) {
2178                 for (i = 0; i < oprs; i++)
2179                     size[i] = asize;
2180                 break;
2181             }
2182         }
2183     } else {
2184         oprs = itemp->operands;
2185     }
2186
2187     for (i = 0; i < itemp->operands; i++) {
2188         if (!(itemp->opd[i] & SIZE_MASK) &&
2189             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2190             return MERR_OPSIZEMISMATCH;
2191     }
2192
2193     /*
2194      * Check template is okay at the set cpu level
2195      */
2196     if (((itemp->flags & IF_PLEVEL) > cpu))
2197         return MERR_BADCPU;
2198
2199     /*
2200      * Verify the appropriate long mode flag.
2201      */
2202     if ((itemp->flags & (bits == 64 ? IF_NOLONG : IF_LONG)))
2203         return MERR_BADMODE;
2204
2205     /*
2206      * Check if special handling needed for Jumps
2207      */
2208     if ((itemp->code[0] & 0374) == 0370)
2209         return MOK_JUMP;
2210
2211     return MOK_GOOD;
2212 }
2213
2214 static ea *process_ea(operand * input, ea * output, int bits,
2215                       int addrbits, int rfield, opflags_t rflags)
2216 {
2217     bool forw_ref = !!(input->opflags & OPFLAG_UNKNOWN);
2218
2219     output->rip = false;
2220
2221     /* REX flags for the rfield operand */
2222     output->rex |= rexflags(rfield, rflags, REX_R | REX_P | REX_W | REX_H);
2223
2224     if (is_class(REGISTER, input->type)) {  /* register direct */
2225         int i;
2226         opflags_t f;
2227
2228         if (!is_register(input->basereg))
2229             return NULL;
2230         f = regflag(input);
2231         i = nasm_regvals[input->basereg];
2232
2233         if (REG_EA & ~f)
2234             return NULL;        /* Invalid EA register */
2235
2236         output->rex |= op_rexflags(input, REX_B | REX_P | REX_W | REX_H);
2237
2238         output->sib_present = false;    /* no SIB necessary */
2239         output->bytes = 0;              /* no offset necessary either */
2240         output->modrm = 0xC0 | ((rfield & 7) << 3) | (i & 7);
2241     } else {                    /* it's a memory reference */
2242         if (input->basereg == -1 &&
2243             (input->indexreg == -1 || input->scale == 0)) {
2244             /* it's a pure offset */
2245
2246             if (bits == 64 && ((input->type & IP_REL) == IP_REL) &&
2247                 input->segment == NO_SEG) {
2248                 nasm_error(ERR_WARNING | ERR_PASS1, "absolute address can not be RIP-relative");
2249                 input->type &= ~IP_REL;
2250                 input->type |= MEMORY;
2251             }
2252
2253             if (input->eaflags & EAF_BYTEOFFS ||
2254                 (input->eaflags & EAF_WORDOFFS &&
2255                  input->disp_size != (addrbits != 16 ? 32 : 16))) {
2256                 nasm_error(ERR_WARNING | ERR_PASS1, "displacement size ignored on absolute address");
2257             }
2258
2259             if (bits == 64 && (~input->type & IP_REL)) {
2260                 int scale, index, base;
2261                 output->sib_present = true;
2262                 scale = 0;
2263                 index = 4;
2264                 base = 5;
2265                 output->sib = (scale << 6) | (index << 3) | base;
2266                 output->bytes = 4;
2267                 output->modrm = 4 | ((rfield & 7) << 3);
2268                 output->rip = false;
2269             } else {
2270                 output->sib_present = false;
2271                 output->bytes = (addrbits != 16 ? 4 : 2);
2272                 output->modrm = (addrbits != 16 ? 5 : 6) | ((rfield & 7) << 3);
2273                 output->rip = bits == 64;
2274             }
2275         } else {                /* it's an indirection */
2276             int i = input->indexreg, b = input->basereg, s = input->scale;
2277             int32_t seg = input->segment;
2278             int hb = input->hintbase, ht = input->hinttype;
2279             int t, it, bt;              /* register numbers */
2280             opflags_t x, ix, bx;        /* register flags */
2281
2282             if (s == 0)
2283                 i = -1;         /* make this easy, at least */
2284
2285             if (is_register(i)) {
2286                 it = nasm_regvals[i];
2287                 ix = nasm_reg_flags[i];
2288             } else {
2289                 it = -1;
2290                 ix = 0;
2291             }
2292
2293             if (is_register(b)) {
2294                 bt = nasm_regvals[b];
2295                 bx = nasm_reg_flags[b];
2296             } else {
2297                 bt = -1;
2298                 bx = 0;
2299             }
2300
2301             /* check for a 32/64-bit memory reference... */
2302             if ((ix|bx) & (BITS32|BITS64)) {
2303                 /*
2304                  * it must be a 32/64-bit memory reference. Firstly we have
2305                  * to check that all registers involved are type E/Rxx.
2306                  */
2307                 int32_t sok = BITS32 | BITS64, o = input->offset;
2308
2309                 if (it != -1) {
2310                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2311                         sok &= ix;
2312                     else
2313                         return NULL;
2314                 }
2315
2316                 if (bt != -1) {
2317                     if (REG_GPR & ~bx)
2318                         return NULL; /* Invalid register */
2319                     if (~sok & bx & SIZE_MASK)
2320                         return NULL; /* Invalid size */
2321                     sok &= bx;
2322                 }
2323
2324                 /*
2325                  * While we're here, ensure the user didn't specify
2326                  * WORD or QWORD
2327                  */
2328                 if (input->disp_size == 16 || input->disp_size == 64)
2329                     return NULL;
2330
2331                 if (addrbits == 16 ||
2332                     (addrbits == 32 && !(sok & BITS32)) ||
2333                     (addrbits == 64 && !(sok & BITS64)))
2334                     return NULL;
2335
2336                 /* now reorganize base/index */
2337                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2338                     ((hb == b && ht == EAH_NOTBASE) ||
2339                      (hb == i && ht == EAH_MAKEBASE))) {
2340                     /* swap if hints say so */
2341                     t = bt, bt = it, it = t;
2342                     x = bx, bx = ix, ix = x;
2343                 }
2344                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2345                     bt = -1, bx = 0, s++;
2346                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2347                     /* make single reg base, unless hint */
2348                     bt = it, bx = ix, it = -1, ix = 0;
2349                 }
2350                 if (((s == 2 && it != REG_NUM_ESP && !(input->eaflags & EAF_TIMESTWO)) ||
2351                       s == 3 || s == 5 || s == 9) && bt == -1)
2352                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2353                 if (it == -1 && (bt & 7) != REG_NUM_ESP &&
2354                     (input->eaflags & EAF_TIMESTWO))
2355                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2356                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2357                 if (s == 1 && it == REG_NUM_ESP) {
2358                     /* swap ESP into base if scale is 1 */
2359                     t = it, it = bt, bt = t;
2360                     x = ix, ix = bx, bx = x;
2361                 }
2362                 if (it == REG_NUM_ESP ||
2363                     (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2364                     return NULL;        /* wrong, for various reasons */
2365
2366                 output->rex |= rexflags(it, ix, REX_X);
2367                 output->rex |= rexflags(bt, bx, REX_B);
2368
2369                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2370                     /* no SIB needed */
2371                     int mod, rm;
2372
2373                     if (bt == -1) {
2374                         rm = 5;
2375                         mod = 0;
2376                     } else {
2377                         rm = (bt & 7);
2378                         if (rm != REG_NUM_EBP && o == 0 &&
2379                             seg == NO_SEG && !forw_ref &&
2380                             !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2381                             mod = 0;
2382                         else if (input->eaflags & EAF_BYTEOFFS ||
2383                                  (o >= -128 && o <= 127 &&
2384                                   seg == NO_SEG && !forw_ref &&
2385                                   !(input->eaflags & EAF_WORDOFFS)))
2386                             mod = 1;
2387                         else
2388                             mod = 2;
2389                     }
2390
2391                     output->sib_present = false;
2392                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2393                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2394                 } else {
2395                     /* we need a SIB */
2396                     int mod, scale, index, base;
2397
2398                     if (it == -1)
2399                         index = 4, s = 1;
2400                     else
2401                         index = (it & 7);
2402
2403                     switch (s) {
2404                     case 1:
2405                         scale = 0;
2406                         break;
2407                     case 2:
2408                         scale = 1;
2409                         break;
2410                     case 4:
2411                         scale = 2;
2412                         break;
2413                     case 8:
2414                         scale = 3;
2415                         break;
2416                     default:   /* then what the smeg is it? */
2417                         return NULL;    /* panic */
2418                     }
2419
2420                     if (bt == -1) {
2421                         base = 5;
2422                         mod = 0;
2423                     } else {
2424                         base = (bt & 7);
2425                         if (base != REG_NUM_EBP && o == 0 &&
2426                             seg == NO_SEG && !forw_ref &&
2427                             !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2428                             mod = 0;
2429                         else if (input->eaflags & EAF_BYTEOFFS ||
2430                                  (o >= -128 && o <= 127 &&
2431                                   seg == NO_SEG && !forw_ref &&
2432                                   !(input->eaflags & EAF_WORDOFFS)))
2433                             mod = 1;
2434                         else
2435                             mod = 2;
2436                     }
2437
2438                     output->sib_present = true;
2439                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2440                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | 4;
2441                     output->sib = (scale << 6) | (index << 3) | base;
2442                 }
2443             } else {            /* it's 16-bit */
2444                 int mod, rm;
2445                 int16_t o = input->offset;
2446
2447                 /* check for 64-bit long mode */
2448                 if (addrbits == 64)
2449                     return NULL;
2450
2451                 /* check all registers are BX, BP, SI or DI */
2452                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI && b != R_DI) ||
2453                     (i != -1 && i != R_BP && i != R_BX && i != R_SI && i != R_DI))
2454                     return NULL;
2455
2456                 /* ensure the user didn't specify DWORD/QWORD */
2457                 if (input->disp_size == 32 || input->disp_size == 64)
2458                     return NULL;
2459
2460                 if (s != 1 && i != -1)
2461                     return NULL;        /* no can do, in 16-bit EA */
2462                 if (b == -1 && i != -1) {
2463                     int tmp = b;
2464                     b = i;
2465                     i = tmp;
2466                 }               /* swap */
2467                 if ((b == R_SI || b == R_DI) && i != -1) {
2468                     int tmp = b;
2469                     b = i;
2470                     i = tmp;
2471                 }
2472                 /* have BX/BP as base, SI/DI index */
2473                 if (b == i)
2474                     return NULL;        /* shouldn't ever happen, in theory */
2475                 if (i != -1 && b != -1 &&
2476                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2477                     return NULL;        /* invalid combinations */
2478                 if (b == -1)            /* pure offset: handled above */
2479                     return NULL;        /* so if it gets to here, panic! */
2480
2481                 rm = -1;
2482                 if (i != -1)
2483                     switch (i * 256 + b) {
2484                     case R_SI * 256 + R_BX:
2485                         rm = 0;
2486                         break;
2487                     case R_DI * 256 + R_BX:
2488                         rm = 1;
2489                         break;
2490                     case R_SI * 256 + R_BP:
2491                         rm = 2;
2492                         break;
2493                     case R_DI * 256 + R_BP:
2494                         rm = 3;
2495                         break;
2496                 } else
2497                     switch (b) {
2498                     case R_SI:
2499                         rm = 4;
2500                         break;
2501                     case R_DI:
2502                         rm = 5;
2503                         break;
2504                     case R_BP:
2505                         rm = 6;
2506                         break;
2507                     case R_BX:
2508                         rm = 7;
2509                         break;
2510                     }
2511                 if (rm == -1)           /* can't happen, in theory */
2512                     return NULL;        /* so panic if it does */
2513
2514                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2515                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2516                     mod = 0;
2517                 else if (input->eaflags & EAF_BYTEOFFS ||
2518                          (o >= -128 && o <= 127 && seg == NO_SEG &&
2519                           !forw_ref && !(input->eaflags & EAF_WORDOFFS)))
2520                     mod = 1;
2521                 else
2522                     mod = 2;
2523
2524                 output->sib_present = false;    /* no SIB - it's 16-bit */
2525                 output->bytes = mod;            /* bytes of offset needed */
2526                 output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2527             }
2528         }
2529     }
2530
2531     output->size = 1 + output->sib_present + output->bytes;
2532     return output;
2533 }
2534
2535 static void add_asp(insn *ins, int addrbits)
2536 {
2537     int j, valid;
2538     int defdisp;
2539
2540     valid = (addrbits == 64) ? 64|32 : 32|16;
2541
2542     switch (ins->prefixes[PPS_ASIZE]) {
2543     case P_A16:
2544         valid &= 16;
2545         break;
2546     case P_A32:
2547         valid &= 32;
2548         break;
2549     case P_A64:
2550         valid &= 64;
2551         break;
2552     case P_ASP:
2553         valid &= (addrbits == 32) ? 16 : 32;
2554         break;
2555     default:
2556         break;
2557     }
2558
2559     for (j = 0; j < ins->operands; j++) {
2560         if (is_class(MEMORY, ins->oprs[j].type)) {
2561             opflags_t i, b;
2562
2563             /* Verify as Register */
2564             if (!is_register(ins->oprs[j].indexreg))
2565                 i = 0;
2566             else
2567                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2568
2569             /* Verify as Register */
2570             if (!is_register(ins->oprs[j].basereg))
2571                 b = 0;
2572             else
2573                 b = nasm_reg_flags[ins->oprs[j].basereg];
2574
2575             if (ins->oprs[j].scale == 0)
2576                 i = 0;
2577
2578             if (!i && !b) {
2579                 int ds = ins->oprs[j].disp_size;
2580                 if ((addrbits != 64 && ds > 8) ||
2581                     (addrbits == 64 && ds == 16))
2582                     valid &= ds;
2583             } else {
2584                 if (!(REG16 & ~b))
2585                     valid &= 16;
2586                 if (!(REG32 & ~b))
2587                     valid &= 32;
2588                 if (!(REG64 & ~b))
2589                     valid &= 64;
2590
2591                 if (!(REG16 & ~i))
2592                     valid &= 16;
2593                 if (!(REG32 & ~i))
2594                     valid &= 32;
2595                 if (!(REG64 & ~i))
2596                     valid &= 64;
2597             }
2598         }
2599     }
2600
2601     if (valid & addrbits) {
2602         ins->addr_size = addrbits;
2603     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2604         /* Add an address size prefix */
2605         enum prefixes pref = (addrbits == 32) ? P_A16 : P_A32;
2606         ins->prefixes[PPS_ASIZE] = pref;
2607         ins->addr_size = (addrbits == 32) ? 16 : 32;
2608     } else {
2609         /* Impossible... */
2610         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2611         ins->addr_size = addrbits; /* Error recovery */
2612     }
2613
2614     defdisp = ins->addr_size == 16 ? 16 : 32;
2615
2616     for (j = 0; j < ins->operands; j++) {
2617         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2618             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp) != ins->addr_size) {
2619             /*
2620              * mem_offs sizes must match the address size; if not,
2621              * strip the MEM_OFFS bit and match only EA instructions
2622              */
2623             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2624         }
2625     }
2626 }