assemble.c

   1 /* ----------------------------------------------------------------------- *
   2  *
   3  *   Copyright 1996-2009 The NASM Authors - All Rights Reserved
   4  *   See the file AUTHORS included with the NASM distribution for
   5  *   the specific copyright holders.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following
   9  *   conditions are met:
  10  *
  11  *   * Redistributions of source code must retain the above copyright
  12  *     notice, this list of conditions and the following disclaimer.
  13  *   * Redistributions in binary form must reproduce the above
  14  *     copyright notice, this list of conditions and the following
  15  *     disclaimer in the documentation and/or other materials provided
  16  *     with the distribution.
  17  *
  18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  19  *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  20  *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  21  *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22  *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  23  *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24  *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  25  *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28  *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  29  *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  30  *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31  *
  32  * ----------------------------------------------------------------------- */
  33
  34 /*
  35  * assemble.c   code generation for the Netwide Assembler
  36  *
  37  * the actual codes (C syntax, i.e. octal):
  38  * \0            - terminates the code. (Unless it's a literal of course.)
  39  * \1..\4        - that many literal bytes follow in the code stream
  40  * \5            - add 4 to the primary operand number (b, low octdigit)
  41  * \6            - add 4 to the secondary operand number (a, middle octdigit)
  42  * \7            - add 4 to both the primary and the secondary operand number
  43  * \10..\13      - a literal byte follows in the code stream, to be added
  44  *                 to the register value of operand 0..3
  45  * \14..\17      - a signed byte immediate operand, from operand 0..3
  46  * \20..\23      - a byte immediate operand, from operand 0..3
  47  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  48  * \30..\33      - a word immediate operand, from operand 0..3
  49  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  50  *                 assembly mode or the operand-size override on the operand
  51  * \40..\43      - a long immediate operand, from operand 0..3
  52  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  53  *                 depending on the address size of the instruction.
  54  * \50..\53      - a byte relative operand, from operand 0..3
  55  * \54..\57      - a qword immediate operand, from operand 0..3
  56  * \60..\63      - a word relative operand, from operand 0..3
  57  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  58  *                 assembly mode or the operand-size override on the operand
  59  * \70..\73      - a long relative operand, from operand 0..3
  60  * \74..\77      - a word constant, from the _segment_ part of operand 0..3
  61  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  62  *                 field the register value of operand b.
  63  * \140..\143    - an immediate word or signed byte for operand 0..3
  64  * \144..\147    - or 2 (s-field) into opcode byte if operand 0..3
  65  *                  is a signed byte rather than a word.  Opcode byte follows.
  66  * \150..\153    - an immediate dword or signed byte for operand 0..3
  67  * \154..\157    - or 2 (s-field) into opcode byte if operand 0..3
  68  *                  is a signed byte rather than a dword.  Opcode byte follows.
  69  * \160..\163    - this instruction uses DREX rather than REX, with the
  70  *                 OC0 field set to 0, and the dest field taken from
  71  *                 operand 0..3.
  72  * \164..\167    - this instruction uses DREX rather than REX, with the
  73  *                 OC0 field set to 1, and the dest field taken from
  74  *                 operand 0..3.
  75  * \171          - placement of DREX suffix in the absence of an EA
  76  * \172\ab       - the register number from operand a in bits 7..4, with
  77  *                 the 4-bit immediate from operand b in bits 3..0.
  78  * \173\xab      - the register number from operand a in bits 7..4, with
  79  *                 the value b in bits 3..0.
  80  * \174\a        - the register number from operand a in bits 7..4, and
  81  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  82  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  83  *                 field equal to digit b.
  84  * \250..\253    - same as \150..\153, except warn if the 64-bit operand
  85  *                 is not equal to the truncated and sign-extended 32-bit
  86  *                 operand; used for 32-bit immediates in 64-bit mode.
  87  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
  88  * \260..\263    - this instruction uses VEX/XOP rather than REX, with the
  89  *                 V field taken from operand 0..3.
  90  * \270          - this instruction uses VEX/XOP rather than REX, with the
  91  *                 V field set to 1111b.
  92  *
  93  * VEX/XOP prefixes are followed by the sequence:
  94  * \tmm\wlp        where mm is the M field; and wlp is:
  95  *                 00 0ww lpp
  96  *                 [w0] ww = 0 for W = 0
  97  *                 [w1] ww = 1 for W = 1
  98  *                 [wx] ww = 2 for W don't care (always assembled as 0)
  99  *                 [ww] ww = 3 for W used as REX.W
 100  *
 101  * t = 0 for VEX (C4/C5), t = 1 for XOP (8F).
 102  *
 103  * \274..\277    - a signed byte immediate operand, from operand 0..3,
 104  *                 which is to be extended to the operand size.
 105  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
 106  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
 107  * \312          - (disassembler only) invalid with non-default address size.
 108  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
 109  * \314          - (disassembler only) invalid with REX.B
 110  * \315          - (disassembler only) invalid with REX.X
 111  * \316          - (disassembler only) invalid with REX.R
 112  * \317          - (disassembler only) invalid with REX.W
 113  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
 114  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
 115  * \322          - indicates that this instruction is only valid when the
 116  *                 operand size is the default (instruction to disassembler,
 117  *                 generates no code in the assembler)
 118  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
 119  * \324          - indicates 64-bit operand size requiring REX prefix.
 120  * \325          - instruction which always uses spl/bpl/sil/dil
 121  * \330          - a literal byte follows in the code stream, to be added
 122  *                 to the condition code value of the instruction.
 123  * \331          - instruction not valid with REP prefix.  Hint for
 124  *                 disassembler only; for SSE instructions.
 125  * \332          - REP prefix (0xF2 byte) used as opcode extension.
 126  * \333          - REP prefix (0xF3 byte) used as opcode extension.
 127  * \334          - LOCK prefix used as REX.R (used in non-64-bit mode)
 128  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
 129  * \336          - force a REP(E) prefix (0xF2) even if not specified.
 130  * \337          - force a REPNE prefix (0xF3) even if not specified.
 131  *                 \336-\337 are still listed as prefixes in the disassembler.
 132  * \340          - reserve <operand 0> bytes of uninitialized storage.
 133  *                 Operand 0 had better be a segmentless constant.
 134  * \341          - this instruction needs a WAIT "prefix"
 135  * \344,\345     - the PUSH/POP (respectively) codes for CS, DS, ES, SS
 136  *                 (POP is never used for CS) depending on operand 0
 137  * \346,\347     - the second byte of PUSH/POP codes for FS, GS, depending
 138  *                 on operand 0
 139  * \360          - no SSE prefix (== \364\331)
 140  * \361          - 66 SSE prefix (== \366\331)
 141  * \362          - F2 SSE prefix (== \364\332)
 142  * \363          - F3 SSE prefix (== \364\333)
 143  * \364          - operand-size prefix (0x66) not permitted
 144  * \365          - address-size prefix (0x67) not permitted
 145  * \366          - operand-size prefix (0x66) used as opcode extension
 146  * \367          - address-size prefix (0x67) used as opcode extension
 147  * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
 148  *                 370 is used for Jcc, 371 is used for JMP.
 149  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 150  *                 used for conditional jump over longer jump
 151  */
 152
 153 #include "compiler.h"
 154
 155 #include <stdio.h>
 156 #include <string.h>
 157 #include <inttypes.h>
 158
 159 #include "nasm.h"
 160 #include "nasmlib.h"
 161 #include "assemble.h"
 162 #include "insns.h"
 163 #include "tables.h"
 164
 165 enum match_result {
 166     /*
 167      * Matching errors.  These should be sorted so that more specific
 168      * errors come later in the sequence.
 169      */
 170     MERR_INVALOP,
 171     MERR_OPSIZEMISSING,
 172     MERR_OPSIZEMISMATCH,
 173     MERR_BADCPU,
 174     MERR_BADMODE,
 175     /*
 176      * Matching success; the conditional ones first
 177      */
 178     MOK_JUMP,                   /* Matching OK but needs jmp_match() */
 179     MOK_GOOD                    /* Matching unconditionally OK */
 180 };
 181
 182 typedef struct {
 183     int sib_present;                 /* is a SIB byte necessary? */
 184     int bytes;                       /* # of bytes of offset needed */
 185     int size;                        /* lazy - this is sib+bytes+1 */
 186     uint8_t modrm, sib, rex, rip;    /* the bytes themselves */
 187 } ea;
 188
 189 static uint32_t cpu;            /* cpu level received from nasm.c */
 190 static efunc errfunc;
 191 static struct ofmt *outfmt;
 192 static ListGen *list;
 193
 194 static int64_t calcsize(int32_t, int64_t, int, insn *, const uint8_t *);
 195 static void gencode(int32_t segment, int64_t offset, int bits,
 196                     insn * ins, const struct itemplate *temp,
 197                     int64_t insn_end);
 198 static enum match_result find_match(const struct itemplate **tempp,
 199                                     insn *instruction,
 200                                     int32_t segment, int64_t offset, int bits);
 201 static enum match_result matches(const struct itemplate *, insn *, int bits);
 202 static opflags_t regflag(const operand *);
 203 static int32_t regval(const operand *);
 204 static int rexflags(int, opflags_t, int);
 205 static int op_rexflags(const operand *, int);
 206 static ea *process_ea(operand *, ea *, int, int, int, opflags_t);
 207 static void add_asp(insn *, int);
 208
 209 static int has_prefix(insn * ins, enum prefix_pos pos, enum prefixes prefix)
 210 {
 211     return ins->prefixes[pos] == prefix;
 212 }
 213
 214 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 215 {
 216     if (ins->prefixes[pos])
 217         errfunc(ERR_NONFATAL, "invalid %s prefix",
 218                 prefix_name(ins->prefixes[pos]));
 219 }
 220
 221 static const char *size_name(int size)
 222 {
 223     switch (size) {
 224     case 1:
 225         return "byte";
 226     case 2:
 227         return "word";
 228     case 4:
 229         return "dword";
 230     case 8:
 231         return "qword";
 232     case 10:
 233         return "tword";
 234     case 16:
 235         return "oword";
 236     case 32:
 237         return "yword";
 238     default:
 239         return "???";
 240     }
 241 }
 242
 243 static void warn_overflow(int pass, int size)
 244 {
 245     errfunc(ERR_WARNING | pass | ERR_WARN_NOV,
 246             "%s data exceeds bounds", size_name(size));
 247 }
 248
 249 static void warn_overflow_const(int64_t data, int size)
 250 {
 251     if (overflow_general(data, size))
 252         warn_overflow(ERR_PASS1, size);
 253 }
 254
 255 static void warn_overflow_opd(const struct operand *o, int size)
 256 {
 257     if (o->wrt == NO_SEG && o->segment == NO_SEG) {
 258         if (overflow_general(o->offset, size))
 259             warn_overflow(ERR_PASS2, size);
 260     }
 261 }
 262
 263 /*
 264  * This routine wrappers the real output format's output routine,
 265  * in order to pass a copy of the data off to the listing file
 266  * generator at the same time.
 267  */
 268 static void out(int64_t offset, int32_t segto, const void *data,
 269                 enum out_type type, uint64_t size,
 270                 int32_t segment, int32_t wrt)
 271 {
 272     static int32_t lineno = 0;     /* static!!! */
 273     static char *lnfname = NULL;
 274     uint8_t p[8];
 275
 276     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 277         /*
 278          * This is a non-relocated address, and we're going to
 279          * convert it into RAWDATA format.
 280          */
 281         uint8_t *q = p;
 282
 283         if (size > 8) {
 284             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 285             return;
 286         }
 287
 288         WRITEADDR(q, *(int64_t *)data, size);
 289         data = p;
 290         type = OUT_RAWDATA;
 291     }
 292
 293     list->output(offset, data, type, size);
 294
 295     /*
 296      * this call to src_get determines when we call the
 297      * debug-format-specific "linenum" function
 298      * it updates lineno and lnfname to the current values
 299      * returning 0 if "same as last time", -2 if lnfname
 300      * changed, and the amount by which lineno changed,
 301      * if it did. thus, these variables must be static
 302      */
 303
 304     if (src_get(&lineno, &lnfname)) {
 305         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 306     }
 307
 308     outfmt->output(segto, data, type, size, segment, wrt);
 309 }
 310
 311 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 312                      insn * ins, const uint8_t *code)
 313 {
 314     int64_t isize;
 315     uint8_t c = code[0];
 316
 317     if ((c != 0370 && c != 0371) || (ins->oprs[0].type & STRICT))
 318         return false;
 319     if (!optimizing)
 320         return false;
 321     if (optimizing < 0 && c == 0371)
 322         return false;
 323
 324     isize = calcsize(segment, offset, bits, ins, code);
 325
 326     if (ins->oprs[0].opflags & OPFLAG_UNKNOWN)
 327         /* Be optimistic in pass 1 */
 328         return true;
 329
 330     if (ins->oprs[0].segment != segment)
 331         return false;
 332
 333     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 334     return (isize >= -128 && isize <= 127); /* is it byte size? */
 335 }
 336
 337 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 338                  insn * instruction, struct ofmt *output, efunc error,
 339                  ListGen * listgen)
 340 {
 341     const struct itemplate *temp;
 342     int j;
 343     enum match_result m;
 344     int64_t insn_end;
 345     int32_t itimes;
 346     int64_t start = offset;
 347     int64_t wsize;              /* size for DB etc. */
 348
 349     errfunc = error;            /* to pass to other functions */
 350     cpu = cp;
 351     outfmt = output;            /* likewise */
 352     list = listgen;             /* and again */
 353
 354     wsize = idata_bytes(instruction->opcode);
 355     if (wsize == -1)
 356         return 0;
 357
 358     if (wsize) {
 359         extop *e;
 360         int32_t t = instruction->times;
 361         if (t < 0)
 362             errfunc(ERR_PANIC,
 363                     "instruction->times < 0 (%ld) in assemble()", t);
 364
 365         while (t--) {           /* repeat TIMES times */
 366             list_for_each(e, instruction->eops) {
 367                 if (e->type == EOT_DB_NUMBER) {
 368                     if (wsize == 1) {
 369                         if (e->segment != NO_SEG)
 370                             errfunc(ERR_NONFATAL,
 371                                     "one-byte relocation attempted");
 372                         else {
 373                             uint8_t out_byte = e->offset;
 374                             out(offset, segment, &out_byte,
 375                                 OUT_RAWDATA, 1, NO_SEG, NO_SEG);
 376                         }
 377                     } else if (wsize > 8) {
 378                         errfunc(ERR_NONFATAL,
 379                                 "integer supplied to a DT, DO or DY"
 380                                 " instruction");
 381                     } else
 382                         out(offset, segment, &e->offset,
 383                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 384                     offset += wsize;
 385                 } else if (e->type == EOT_DB_STRING ||
 386                            e->type == EOT_DB_STRING_FREE) {
 387                     int align;
 388
 389                     out(offset, segment, e->stringval,
 390                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 391                     align = e->stringlen % wsize;
 392
 393                     if (align) {
 394                         align = wsize - align;
 395                         out(offset, segment, zero_buffer,
 396                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 397                     }
 398                     offset += e->stringlen + align;
 399                 }
 400             }
 401             if (t > 0 && t == instruction->times - 1) {
 402                 /*
 403                  * Dummy call to list->output to give the offset to the
 404                  * listing module.
 405                  */
 406                 list->output(offset, NULL, OUT_RAWDATA, 0);
 407                 list->uplevel(LIST_TIMES);
 408             }
 409         }
 410         if (instruction->times > 1)
 411             list->downlevel(LIST_TIMES);
 412         return offset - start;
 413     }
 414
 415     if (instruction->opcode == I_INCBIN) {
 416         const char *fname = instruction->eops->stringval;
 417         FILE *fp;
 418
 419         fp = fopen(fname, "rb");
 420         if (!fp) {
 421             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 422                   fname);
 423         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 424             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 425                   fname);
 426         } else {
 427             static char buf[4096];
 428             size_t t = instruction->times;
 429             size_t base = 0;
 430             size_t len;
 431
 432             len = ftell(fp);
 433             if (instruction->eops->next) {
 434                 base = instruction->eops->next->offset;
 435                 len -= base;
 436                 if (instruction->eops->next->next &&
 437                     len > (size_t)instruction->eops->next->next->offset)
 438                     len = (size_t)instruction->eops->next->next->offset;
 439             }
 440             /*
 441              * Dummy call to list->output to give the offset to the
 442              * listing module.
 443              */
 444             list->output(offset, NULL, OUT_RAWDATA, 0);
 445             list->uplevel(LIST_INCBIN);
 446             while (t--) {
 447                 size_t l;
 448
 449                 fseek(fp, base, SEEK_SET);
 450                 l = len;
 451                 while (l > 0) {
 452                     int32_t m;
 453                     m = fread(buf, 1, l > sizeof(buf) ? sizeof(buf) : l, fp);
 454                     if (!m) {
 455                         /*
 456                          * This shouldn't happen unless the file
 457                          * actually changes while we are reading
 458                          * it.
 459                          */
 460                         error(ERR_NONFATAL,
 461                               "`incbin': unexpected EOF while"
 462                               " reading file `%s'", fname);
 463                         t = 0;  /* Try to exit cleanly */
 464                         break;
 465                     }
 466                     out(offset, segment, buf, OUT_RAWDATA, m,
 467                         NO_SEG, NO_SEG);
 468                     l -= m;
 469                 }
 470             }
 471             list->downlevel(LIST_INCBIN);
 472             if (instruction->times > 1) {
 473                 /*
 474                  * Dummy call to list->output to give the offset to the
 475                  * listing module.
 476                  */
 477                 list->output(offset, NULL, OUT_RAWDATA, 0);
 478                 list->uplevel(LIST_TIMES);
 479                 list->downlevel(LIST_TIMES);
 480             }
 481             fclose(fp);
 482             return instruction->times * len;
 483         }
 484         return 0;               /* if we're here, there's an error */
 485     }
 486
 487     /* Check to see if we need an address-size prefix */
 488     add_asp(instruction, bits);
 489
 490     m = find_match(&temp, instruction, segment, offset, bits);
 491
 492     if (m == MOK_GOOD) {
 493         /* Matches! */
 494         int64_t insn_size = calcsize(segment, offset, bits,
 495                                      instruction, temp->code);
 496         itimes = instruction->times;
 497         if (insn_size < 0)  /* shouldn't be, on pass two */
 498             error(ERR_PANIC, "errors made it through from pass one");
 499         else
 500             while (itimes--) {
 501                 for (j = 0; j < MAXPREFIX; j++) {
 502                     uint8_t c = 0;
 503                     switch (instruction->prefixes[j]) {
 504                     case P_WAIT:
 505                         c = 0x9B;
 506                         break;
 507                     case P_LOCK:
 508                         c = 0xF0;
 509                         break;
 510                     case P_REPNE:
 511                     case P_REPNZ:
 512                         c = 0xF2;
 513                         break;
 514                     case P_REPE:
 515                     case P_REPZ:
 516                     case P_REP:
 517                         c = 0xF3;
 518                         break;
 519                     case R_CS:
 520                         if (bits == 64) {
 521                             error(ERR_WARNING | ERR_PASS2,
 522                                   "cs segment base generated, but will be ignored in 64-bit mode");
 523                         }
 524                         c = 0x2E;
 525                         break;
 526                     case R_DS:
 527                         if (bits == 64) {
 528                             error(ERR_WARNING | ERR_PASS2,
 529                                   "ds segment base generated, but will be ignored in 64-bit mode");
 530                         }
 531                         c = 0x3E;
 532                         break;
 533                     case R_ES:
 534                         if (bits == 64) {
 535                             error(ERR_WARNING | ERR_PASS2,
 536                                   "es segment base generated, but will be ignored in 64-bit mode");
 537                         }
 538                         c = 0x26;
 539                         break;
 540                     case R_FS:
 541                         c = 0x64;
 542                         break;
 543                     case R_GS:
 544                         c = 0x65;
 545                         break;
 546                     case R_SS:
 547                         if (bits == 64) {
 548                             error(ERR_WARNING | ERR_PASS2,
 549                                   "ss segment base generated, but will be ignored in 64-bit mode");
 550                         }
 551                         c = 0x36;
 552                         break;
 553                     case R_SEGR6:
 554                     case R_SEGR7:
 555                         error(ERR_NONFATAL,
 556                               "segr6 and segr7 cannot be used as prefixes");
 557                         break;
 558                     case P_A16:
 559                         if (bits == 64) {
 560                             error(ERR_NONFATAL,
 561                                   "16-bit addressing is not supported "
 562                                   "in 64-bit mode");
 563                         } else if (bits != 16)
 564                             c = 0x67;
 565                         break;
 566                     case P_A32:
 567                         if (bits != 32)
 568                             c = 0x67;
 569                         break;
 570                     case P_A64:
 571                         if (bits != 64) {
 572                             error(ERR_NONFATAL,
 573                                   "64-bit addressing is only supported "
 574                                   "in 64-bit mode");
 575                         }
 576                         break;
 577                     case P_ASP:
 578                         c = 0x67;
 579                         break;
 580                     case P_O16:
 581                         if (bits != 16)
 582                             c = 0x66;
 583                         break;
 584                     case P_O32:
 585                         if (bits == 16)
 586                             c = 0x66;
 587                         break;
 588                     case P_O64:
 589                         /* REX.W */
 590                         break;
 591                     case P_OSP:
 592                         c = 0x66;
 593                         break;
 594                     case P_none:
 595                         break;
 596                     default:
 597                         error(ERR_PANIC, "invalid instruction prefix");
 598                     }
 599                     if (c != 0) {
 600                         out(offset, segment, &c, OUT_RAWDATA, 1,
 601                             NO_SEG, NO_SEG);
 602                         offset++;
 603                     }
 604                 }
 605                 insn_end = offset + insn_size;
 606                 gencode(segment, offset, bits, instruction,
 607                         temp, insn_end);
 608                 offset += insn_size;
 609                 if (itimes > 0 && itimes == instruction->times - 1) {
 610                     /*
 611                      * Dummy call to list->output to give the offset to the
 612                      * listing module.
 613                      */
 614                     list->output(offset, NULL, OUT_RAWDATA, 0);
 615                     list->uplevel(LIST_TIMES);
 616                 }
 617             }
 618         if (instruction->times > 1)
 619             list->downlevel(LIST_TIMES);
 620         return offset - start;
 621     } else {
 622         /* No match */
 623         switch (m) {
 624         case MERR_OPSIZEMISSING:
 625             error(ERR_NONFATAL, "operation size not specified");
 626             break;
 627         case MERR_OPSIZEMISMATCH:
 628             error(ERR_NONFATAL, "mismatch in operand sizes");
 629             break;
 630         case MERR_BADCPU:
 631             error(ERR_NONFATAL, "no instruction for this cpu level");
 632             break;
 633         case MERR_BADMODE:
 634             error(ERR_NONFATAL, "instruction not supported in %d-bit mode",
 635                   bits);
 636             break;
 637         default:
 638             error(ERR_NONFATAL,
 639                   "invalid combination of opcode and operands");
 640             break;
 641         }
 642     }
 643     return 0;
 644 }
 645
 646 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 647                   insn * instruction, efunc error)
 648 {
 649     const struct itemplate *temp;
 650     enum match_result m;
 651
 652     errfunc = error;            /* to pass to other functions */
 653     cpu = cp;
 654
 655     if (instruction->opcode == I_none)
 656         return 0;
 657
 658     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 659         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 660         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 661         instruction->opcode == I_DY) {
 662         extop *e;
 663         int32_t isize, osize, wsize;
 664
 665         isize = 0;
 666         wsize = idata_bytes(instruction->opcode);
 667
 668         list_for_each(e, instruction->eops) {
 669             int32_t align;
 670
 671             osize = 0;
 672             if (e->type == EOT_DB_NUMBER) {
 673                 osize = 1;
 674                 warn_overflow_const(e->offset, wsize);
 675             } else if (e->type == EOT_DB_STRING ||
 676                        e->type == EOT_DB_STRING_FREE)
 677                 osize = e->stringlen;
 678
 679             align = (-osize) % wsize;
 680             if (align < 0)
 681                 align += wsize;
 682             isize += osize + align;
 683         }
 684         return isize * instruction->times;
 685     }
 686
 687     if (instruction->opcode == I_INCBIN) {
 688         const char *fname = instruction->eops->stringval;
 689         FILE *fp;
 690         int64_t val = 0;
 691         size_t len;
 692
 693         fp = fopen(fname, "rb");
 694         if (!fp)
 695             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 696                   fname);
 697         else if (fseek(fp, 0L, SEEK_END) < 0)
 698             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 699                   fname);
 700         else {
 701             len = ftell(fp);
 702             if (instruction->eops->next) {
 703                 len -= instruction->eops->next->offset;
 704                 if (instruction->eops->next->next &&
 705                     len > (size_t)instruction->eops->next->next->offset) {
 706                     len = (size_t)instruction->eops->next->next->offset;
 707                 }
 708             }
 709             val = instruction->times * len;
 710         }
 711         if (fp)
 712             fclose(fp);
 713         return val;
 714     }
 715
 716     /* Check to see if we need an address-size prefix */
 717     add_asp(instruction, bits);
 718
 719     m = find_match(&temp, instruction, segment, offset, bits);
 720     if (m == MOK_GOOD) {
 721         /* we've matched an instruction. */
 722         int64_t isize;
 723         const uint8_t *codes = temp->code;
 724         int j;
 725
 726         isize = calcsize(segment, offset, bits, instruction, codes);
 727         if (isize < 0)
 728             return -1;
 729         for (j = 0; j < MAXPREFIX; j++) {
 730             switch (instruction->prefixes[j]) {
 731             case P_A16:
 732                 if (bits != 16)
 733                     isize++;
 734                 break;
 735             case P_A32:
 736                 if (bits != 32)
 737                     isize++;
 738                 break;
 739             case P_O16:
 740                 if (bits != 16)
 741                     isize++;
 742                 break;
 743             case P_O32:
 744                 if (bits == 16)
 745                     isize++;
 746                 break;
 747             case P_A64:
 748             case P_O64:
 749             case P_none:
 750                 break;
 751             default:
 752                 isize++;
 753                 break;
 754             }
 755         }
 756         return isize * instruction->times;
 757     } else {
 758         return -1;                  /* didn't match any instruction */
 759     }
 760 }
 761
 762 static bool possible_sbyte(operand *o)
 763 {
 764     return o->wrt == NO_SEG && o->segment == NO_SEG &&
 765         !(o->opflags & OPFLAG_UNKNOWN) &&
 766         optimizing >= 0 && !(o->type & STRICT);
 767 }
 768
 769 /* check that opn[op]  is a signed byte of size 16 or 32 */
 770 static bool is_sbyte16(operand *o)
 771 {
 772     int16_t v;
 773
 774     if (!possible_sbyte(o))
 775         return false;
 776
 777     v = o->offset;
 778     return v >= -128 && v <= 127;
 779 }
 780
 781 static bool is_sbyte32(operand *o)
 782 {
 783     int32_t v;
 784
 785     if (!possible_sbyte(o))
 786         return false;
 787
 788     v = o->offset;
 789     return v >= -128 && v <= 127;
 790 }
 791
 792 /* Common construct */
 793 #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
 794
 795 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 796                         insn * ins, const uint8_t *codes)
 797 {
 798     int64_t length = 0;
 799     uint8_t c;
 800     int rex_mask = ~0;
 801     int op1, op2;
 802     struct operand *opx;
 803     uint8_t opex = 0;
 804
 805     ins->rex = 0;               /* Ensure REX is reset */
 806
 807     if (ins->prefixes[PPS_OSIZE] == P_O64)
 808         ins->rex |= REX_W;
 809
 810     (void)segment;              /* Don't warn that this parameter is unused */
 811     (void)offset;               /* Don't warn that this parameter is unused */
 812
 813     while (*codes) {
 814         c = *codes++;
 815         op1 = (c & 3) + ((opex & 1) << 2);
 816         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
 817         opx = &ins->oprs[op1];
 818         opex = 0;               /* For the next iteration */
 819
 820         switch (c) {
 821         case 01:
 822         case 02:
 823         case 03:
 824         case 04:
 825             codes += c, length += c;
 826             break;
 827
 828         case 05:
 829         case 06:
 830         case 07:
 831             opex = c;
 832             break;
 833
 834         case4(010):
 835             ins->rex |=
 836                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 837             codes++, length++;
 838             break;
 839
 840         case4(014):
 841         case4(020):
 842         case4(024):
 843             length++;
 844             break;
 845
 846         case4(030):
 847             length += 2;
 848             break;
 849
 850         case4(034):
 851             if (opx->type & (BITS16 | BITS32 | BITS64))
 852                 length += (opx->type & BITS16) ? 2 : 4;
 853             else
 854                 length += (bits == 16) ? 2 : 4;
 855             break;
 856
 857         case4(040):
 858             length += 4;
 859             break;
 860
 861         case4(044):
 862             length += ins->addr_size >> 3;
 863             break;
 864
 865         case4(050):
 866             length++;
 867             break;
 868
 869         case4(054):
 870             length += 8; /* MOV reg64/imm */
 871             break;
 872
 873         case4(060):
 874             length += 2;
 875             break;
 876
 877         case4(064):
 878             if (opx->type & (BITS16 | BITS32 | BITS64))
 879                 length += (opx->type & BITS16) ? 2 : 4;
 880             else
 881                 length += (bits == 16) ? 2 : 4;
 882             break;
 883
 884         case4(070):
 885             length += 4;
 886             break;
 887
 888         case4(074):
 889             length += 2;
 890             break;
 891
 892         case4(0140):
 893             length += is_sbyte16(opx) ? 1 : 2;
 894             break;
 895
 896         case4(0144):
 897             codes++;
 898             length++;
 899             break;
 900
 901         case4(0150):
 902             length += is_sbyte32(opx) ? 1 : 4;
 903             break;
 904
 905         case4(0154):
 906             codes++;
 907             length++;
 908             break;
 909
 910         case4(0160):
 911             length++;
 912             ins->rex |= REX_D;
 913             ins->drexdst = regval(opx);
 914             break;
 915
 916         case4(0164):
 917             length++;
 918             ins->rex |= REX_D|REX_OC;
 919             ins->drexdst = regval(opx);
 920             break;
 921
 922         case 0171:
 923             break;
 924
 925         case 0172:
 926         case 0173:
 927         case 0174:
 928             codes++;
 929             length++;
 930             break;
 931
 932         case4(0250):
 933             length += is_sbyte32(opx) ? 1 : 4;
 934             break;
 935
 936         case4(0254):
 937             length += 4;
 938             break;
 939
 940         case4(0260):
 941             ins->rex |= REX_V;
 942             ins->drexdst = regval(opx);
 943             ins->vex_cm = *codes++;
 944             ins->vex_wlp = *codes++;
 945             break;
 946
 947         case 0270:
 948             ins->rex |= REX_V;
 949             ins->drexdst = 0;
 950             ins->vex_cm = *codes++;
 951             ins->vex_wlp = *codes++;
 952             break;
 953
 954         case4(0274):
 955             length++;
 956             break;
 957
 958         case4(0300):
 959             break;
 960
 961         case 0310:
 962             if (bits == 64)
 963                 return -1;
 964             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
 965             break;
 966
 967         case 0311:
 968             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
 969             break;
 970
 971         case 0312:
 972             break;
 973
 974         case 0313:
 975             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
 976                 has_prefix(ins, PPS_ASIZE, P_A32))
 977                 return -1;
 978             break;
 979
 980         case4(0314):
 981             break;
 982
 983         case 0320:
 984             length += (bits != 16);
 985             break;
 986
 987         case 0321:
 988             length += (bits == 16);
 989             break;
 990
 991         case 0322:
 992             break;
 993
 994         case 0323:
 995             rex_mask &= ~REX_W;
 996             break;
 997
 998         case 0324:
 999             ins->rex |= REX_W;
1000             break;
1001
1002         case 0325:
1003             ins->rex |= REX_NH;
1004             break;
1005
1006         case 0330:
1007             codes++, length++;
1008             break;
1009
1010         case 0331:
1011             break;
1012
1013         case 0332:
1014         case 0333:
1015             length++;
1016             break;
1017
1018         case 0334:
1019             ins->rex |= REX_L;
1020             break;
1021
1022         case 0335:
1023             break;
1024
1025         case 0336:
1026             if (!ins->prefixes[PPS_LREP])
1027                 ins->prefixes[PPS_LREP] = P_REP;
1028             break;
1029
1030         case 0337:
1031             if (!ins->prefixes[PPS_LREP])
1032                 ins->prefixes[PPS_LREP] = P_REPNE;
1033             break;
1034
1035         case 0340:
1036             if (ins->oprs[0].segment != NO_SEG)
1037                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1038                         " quantity of BSS space");
1039             else
1040                 length += ins->oprs[0].offset;
1041             break;
1042
1043         case 0341:
1044             if (!ins->prefixes[PPS_WAIT])
1045                 ins->prefixes[PPS_WAIT] = P_WAIT;
1046             break;
1047
1048         case4(0344):
1049             length++;
1050             break;
1051
1052         case 0360:
1053             break;
1054
1055         case 0361:
1056         case 0362:
1057         case 0363:
1058             length++;
1059             break;
1060
1061         case 0364:
1062         case 0365:
1063             break;
1064
1065         case 0366:
1066         case 0367:
1067             length++;
1068             break;
1069
1070         case 0370:
1071         case 0371:
1072         case 0372:
1073             break;
1074
1075         case 0373:
1076             length++;
1077             break;
1078
1079         case4(0100):
1080         case4(0110):
1081         case4(0120):
1082         case4(0130):
1083         case4(0200):
1084         case4(0204):
1085         case4(0210):
1086         case4(0214):
1087         case4(0220):
1088         case4(0224):
1089         case4(0230):
1090         case4(0234):
1091             {
1092                 ea ea_data;
1093                 int rfield;
1094                 opflags_t rflags;
1095                 struct operand *opy = &ins->oprs[op2];
1096
1097                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1098
1099                 if (c <= 0177) {
1100                     /* pick rfield from operand b (opx) */
1101                     rflags = regflag(opx);
1102                     rfield = nasm_regvals[opx->basereg];
1103                 } else {
1104                     rflags = 0;
1105                     rfield = c & 7;
1106                 }
1107                 if (!process_ea(opy, &ea_data, bits,
1108                                 ins->addr_size, rfield, rflags)) {
1109                     errfunc(ERR_NONFATAL, "invalid effective address");
1110                     return -1;
1111                 } else {
1112                     ins->rex |= ea_data.rex;
1113                     length += ea_data.size;
1114                 }
1115             }
1116             break;
1117
1118         default:
1119             errfunc(ERR_PANIC, "internal instruction table corrupt"
1120                     ": instruction code \\%o (0x%02X) given", c, c);
1121             break;
1122         }
1123     }
1124
1125     ins->rex &= rex_mask;
1126
1127     if (ins->rex & REX_NH) {
1128         if (ins->rex & REX_H) {
1129             errfunc(ERR_NONFATAL, "instruction cannot use high registers");
1130             return -1;
1131         }
1132         ins->rex &= ~REX_P;     /* Don't force REX prefix due to high reg */
1133     }
1134
1135     if (ins->rex & REX_V) {
1136         int bad32 = REX_R|REX_W|REX_X|REX_B;
1137
1138         if (ins->rex & REX_H) {
1139             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1140             return -1;
1141         }
1142         switch (ins->vex_wlp & 030) {
1143         case 000:
1144         case 020:
1145             ins->rex &= ~REX_W;
1146             break;
1147         case 010:
1148             ins->rex |= REX_W;
1149             bad32 &= ~REX_W;
1150             break;
1151         case 030:
1152             /* Follow REX_W */
1153             break;
1154         }
1155
1156         if (bits != 64 && ((ins->rex & bad32) || ins->drexdst > 7)) {
1157             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1158             return -1;
1159         }
1160         if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_R|REX_B)))
1161             length += 3;
1162         else
1163             length += 2;
1164     } else if (ins->rex & REX_D) {
1165         if (ins->rex & REX_H) {
1166             errfunc(ERR_NONFATAL, "cannot use high register in drex instruction");
1167             return -1;
1168         }
1169         if (bits != 64 && ((ins->rex & (REX_R|REX_W|REX_X|REX_B)) ||
1170                            ins->drexdst > 7)) {
1171             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1172             return -1;
1173         }
1174         length++;
1175     } else if (ins->rex & REX_REAL) {
1176         if (ins->rex & REX_H) {
1177             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1178             return -1;
1179         } else if (bits == 64) {
1180             length++;
1181         } else if ((ins->rex & REX_L) &&
1182                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1183                    cpu >= IF_X86_64) {
1184             /* LOCK-as-REX.R */
1185             assert_no_prefix(ins, PPS_LREP);
1186             length++;
1187         } else {
1188             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1189             return -1;
1190         }
1191     }
1192
1193     return length;
1194 }
1195
1196 #define EMIT_REX()                                                      \
1197     if (!(ins->rex & (REX_D|REX_V)) && (ins->rex & REX_REAL) && (bits == 64)) { \
1198         ins->rex = (ins->rex & REX_REAL)|REX_P;                         \
1199         out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG); \
1200         ins->rex = 0;                                                   \
1201         offset += 1; \
1202     }
1203
1204 static void gencode(int32_t segment, int64_t offset, int bits,
1205                     insn * ins, const struct itemplate *temp,
1206                     int64_t insn_end)
1207 {
1208     static char condval[] = {   /* conditional opcodes */
1209         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1210         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1211         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1212     };
1213     uint8_t c;
1214     uint8_t bytes[4];
1215     int64_t size;
1216     int64_t data;
1217     int op1, op2;
1218     struct operand *opx;
1219     const uint8_t *codes = temp->code;
1220     uint8_t opex = 0;
1221
1222     while (*codes) {
1223         c = *codes++;
1224         op1 = (c & 3) + ((opex & 1) << 2);
1225         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
1226         opx = &ins->oprs[op1];
1227         opex = 0;               /* For the next iteration */
1228
1229         switch (c) {
1230         case 01:
1231         case 02:
1232         case 03:
1233         case 04:
1234             EMIT_REX();
1235             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1236             codes += c;
1237             offset += c;
1238             break;
1239
1240         case 05:
1241         case 06:
1242         case 07:
1243             opex = c;
1244             break;
1245
1246         case4(010):
1247             EMIT_REX();
1248             bytes[0] = *codes++ + (regval(opx) & 7);
1249             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1250             offset += 1;
1251             break;
1252
1253         case4(014):
1254             /* The test for BITS8 and SBYTE here is intended to avoid
1255                warning on optimizer actions due to SBYTE, while still
1256                warn on explicit BYTE directives.  Also warn, obviously,
1257                if the optimizer isn't enabled. */
1258             if (((opx->type & BITS8) ||
1259                  !(opx->type & temp->opd[op1] & BYTENESS)) &&
1260                 (opx->offset < -128 || opx->offset > 127)) {
1261                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1262                         "signed byte value exceeds bounds");
1263             }
1264             if (opx->segment != NO_SEG) {
1265                 data = opx->offset;
1266                 out(offset, segment, &data, OUT_ADDRESS, 1,
1267                     opx->segment, opx->wrt);
1268             } else {
1269                 bytes[0] = opx->offset;
1270                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1271                     NO_SEG);
1272             }
1273             offset += 1;
1274             break;
1275
1276         case4(020):
1277             if (opx->offset < -256 || opx->offset > 255) {
1278                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1279                         "byte value exceeds bounds");
1280             }
1281             if (opx->segment != NO_SEG) {
1282                 data = opx->offset;
1283                 out(offset, segment, &data, OUT_ADDRESS, 1,
1284                     opx->segment, opx->wrt);
1285             } else {
1286                 bytes[0] = opx->offset;
1287                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1288                     NO_SEG);
1289             }
1290             offset += 1;
1291             break;
1292
1293         case4(024):
1294             if (opx->offset < 0 || opx->offset > 255)
1295                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1296                         "unsigned byte value exceeds bounds");
1297             if (opx->segment != NO_SEG) {
1298                 data = opx->offset;
1299                 out(offset, segment, &data, OUT_ADDRESS, 1,
1300                     opx->segment, opx->wrt);
1301             } else {
1302                 bytes[0] = opx->offset;
1303                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1304                     NO_SEG);
1305             }
1306             offset += 1;
1307             break;
1308
1309         case4(030):
1310             warn_overflow_opd(opx, 2);
1311             data = opx->offset;
1312             out(offset, segment, &data, OUT_ADDRESS, 2,
1313                 opx->segment, opx->wrt);
1314             offset += 2;
1315             break;
1316
1317         case4(034):
1318             if (opx->type & (BITS16 | BITS32))
1319                 size = (opx->type & BITS16) ? 2 : 4;
1320             else
1321                 size = (bits == 16) ? 2 : 4;
1322             warn_overflow_opd(opx, size);
1323             data = opx->offset;
1324             out(offset, segment, &data, OUT_ADDRESS, size,
1325                 opx->segment, opx->wrt);
1326             offset += size;
1327             break;
1328
1329         case4(040):
1330             warn_overflow_opd(opx, 4);
1331             data = opx->offset;
1332             out(offset, segment, &data, OUT_ADDRESS, 4,
1333                 opx->segment, opx->wrt);
1334             offset += 4;
1335             break;
1336
1337         case4(044):
1338             data = opx->offset;
1339             size = ins->addr_size >> 3;
1340             warn_overflow_opd(opx, size);
1341             out(offset, segment, &data, OUT_ADDRESS, size,
1342                 opx->segment, opx->wrt);
1343             offset += size;
1344             break;
1345
1346         case4(050):
1347             if (opx->segment != segment)
1348                 errfunc(ERR_NONFATAL,
1349                         "short relative jump outside segment");
1350             data = opx->offset - insn_end;
1351             if (data > 127 || data < -128)
1352                 errfunc(ERR_NONFATAL, "short jump is out of range");
1353             bytes[0] = data;
1354             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1355             offset += 1;
1356             break;
1357
1358         case4(054):
1359             data = (int64_t)opx->offset;
1360             out(offset, segment, &data, OUT_ADDRESS, 8,
1361                 opx->segment, opx->wrt);
1362             offset += 8;
1363             break;
1364
1365         case4(060):
1366             if (opx->segment != segment) {
1367                 data = opx->offset;
1368                 out(offset, segment, &data,
1369                     OUT_REL2ADR, insn_end - offset,
1370                     opx->segment, opx->wrt);
1371             } else {
1372                 data = opx->offset - insn_end;
1373                 out(offset, segment, &data,
1374                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1375             }
1376             offset += 2;
1377             break;
1378
1379         case4(064):
1380             if (opx->type & (BITS16 | BITS32 | BITS64))
1381                 size = (opx->type & BITS16) ? 2 : 4;
1382             else
1383                 size = (bits == 16) ? 2 : 4;
1384             if (opx->segment != segment) {
1385                 data = opx->offset;
1386                 out(offset, segment, &data,
1387                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1388                     insn_end - offset, opx->segment, opx->wrt);
1389             } else {
1390                 data = opx->offset - insn_end;
1391                 out(offset, segment, &data,
1392                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1393             }
1394             offset += size;
1395             break;
1396
1397         case4(070):
1398             if (opx->segment != segment) {
1399                 data = opx->offset;
1400                 out(offset, segment, &data,
1401                     OUT_REL4ADR, insn_end - offset,
1402                     opx->segment, opx->wrt);
1403             } else {
1404                 data = opx->offset - insn_end;
1405                 out(offset, segment, &data,
1406                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1407             }
1408             offset += 4;
1409             break;
1410
1411         case4(074):
1412             if (opx->segment == NO_SEG)
1413                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1414                         " relocatable");
1415             data = 0;
1416             out(offset, segment, &data, OUT_ADDRESS, 2,
1417                 outfmt->segbase(1 + opx->segment),
1418                 opx->wrt);
1419             offset += 2;
1420             break;
1421
1422         case4(0140):
1423             data = opx->offset;
1424             warn_overflow_opd(opx, 2);
1425             if (is_sbyte16(opx)) {
1426                 bytes[0] = data;
1427                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1428                     NO_SEG);
1429                 offset++;
1430             } else {
1431                 out(offset, segment, &data, OUT_ADDRESS, 2,
1432                     opx->segment, opx->wrt);
1433                 offset += 2;
1434             }
1435             break;
1436
1437         case4(0144):
1438             EMIT_REX();
1439             bytes[0] = *codes++;
1440             if (is_sbyte16(opx))
1441                 bytes[0] |= 2;  /* s-bit */
1442             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1443             offset++;
1444             break;
1445
1446         case4(0150):
1447             data = opx->offset;
1448             warn_overflow_opd(opx, 4);
1449             if (is_sbyte32(opx)) {
1450                 bytes[0] = data;
1451                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1452                     NO_SEG);
1453                 offset++;
1454             } else {
1455                 out(offset, segment, &data, OUT_ADDRESS, 4,
1456                     opx->segment, opx->wrt);
1457                 offset += 4;
1458             }
1459             break;
1460
1461         case4(0154):
1462             EMIT_REX();
1463             bytes[0] = *codes++;
1464             if (is_sbyte32(opx))
1465                 bytes[0] |= 2;  /* s-bit */
1466             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1467             offset++;
1468             break;
1469
1470         case4(0160):
1471         case4(0164):
1472             break;
1473
1474         case 0171:
1475             bytes[0] =
1476                 (ins->drexdst << 4) |
1477                 (ins->rex & REX_OC ? 0x08 : 0) |
1478                 (ins->rex & (REX_R|REX_X|REX_B));
1479             ins->rex = 0;
1480             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1481             offset++;
1482             break;
1483
1484         case 0172:
1485             c = *codes++;
1486             opx = &ins->oprs[c >> 3];
1487             bytes[0] = nasm_regvals[opx->basereg] << 4;
1488             opx = &ins->oprs[c & 7];
1489             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1490                 errfunc(ERR_NONFATAL,
1491                         "non-absolute expression not permitted as argument %d",
1492                         c & 7);
1493             } else {
1494                 if (opx->offset & ~15) {
1495                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1496                             "four-bit argument exceeds bounds");
1497                 }
1498                 bytes[0] |= opx->offset & 15;
1499             }
1500             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1501             offset++;
1502             break;
1503
1504         case 0173:
1505             c = *codes++;
1506             opx = &ins->oprs[c >> 4];
1507             bytes[0] = nasm_regvals[opx->basereg] << 4;
1508             bytes[0] |= c & 15;
1509             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1510             offset++;
1511             break;
1512
1513         case 0174:
1514             c = *codes++;
1515             opx = &ins->oprs[c];
1516             bytes[0] = nasm_regvals[opx->basereg] << 4;
1517             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1518             offset++;
1519             break;
1520
1521         case4(0250):
1522             data = opx->offset;
1523             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1524                 (int32_t)data != (int64_t)data) {
1525                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1526                         "signed dword immediate exceeds bounds");
1527             }
1528             if (is_sbyte32(opx)) {
1529                 bytes[0] = data;
1530                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1531                     NO_SEG);
1532                 offset++;
1533             } else {
1534                 out(offset, segment, &data, OUT_ADDRESS, 4,
1535                     opx->segment, opx->wrt);
1536                 offset += 4;
1537             }
1538             break;
1539
1540         case4(0254):
1541             data = opx->offset;
1542             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1543                 (int32_t)data != (int64_t)data) {
1544                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1545                         "signed dword immediate exceeds bounds");
1546             }
1547             out(offset, segment, &data, OUT_ADDRESS, 4,
1548                 opx->segment, opx->wrt);
1549             offset += 4;
1550             break;
1551
1552         case4(0260):
1553         case 0270:
1554             codes += 2;
1555             if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1556                 bytes[0] = (ins->vex_cm >> 6) ? 0x8f : 0xc4;
1557                 bytes[1] = (ins->vex_cm & 31) | ((~ins->rex & 7) << 5);
1558                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1559                     ((~ins->drexdst & 15)<< 3) | (ins->vex_wlp & 07);
1560                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1561                 offset += 3;
1562             } else {
1563                 bytes[0] = 0xc5;
1564                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1565                     ((~ins->drexdst & 15) << 3) | (ins->vex_wlp & 07);
1566                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1567                 offset += 2;
1568             }
1569             break;
1570
1571         case4(0274):
1572         {
1573             uint64_t uv, um;
1574             int s;
1575
1576             if (ins->rex & REX_W)
1577                 s = 64;
1578             else if (ins->prefixes[PPS_OSIZE] == P_O16)
1579                 s = 16;
1580             else if (ins->prefixes[PPS_OSIZE] == P_O32)
1581                 s = 32;
1582             else
1583                 s = bits;
1584
1585             um = (uint64_t)2 << (s-1);
1586             uv = opx->offset;
1587
1588             if (uv > 127 && uv < (uint64_t)-128 &&
1589                 (uv < um-128 || uv > um-1)) {
1590                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1591                         "signed byte value exceeds bounds");
1592             }
1593             if (opx->segment != NO_SEG) {
1594                 data = uv;
1595                 out(offset, segment, &data, OUT_ADDRESS, 1,
1596                     opx->segment, opx->wrt);
1597             } else {
1598                 bytes[0] = uv;
1599                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1600                     NO_SEG);
1601             }
1602             offset += 1;
1603             break;
1604         }
1605
1606         case4(0300):
1607             break;
1608
1609         case 0310:
1610             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1611                 *bytes = 0x67;
1612                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1613                 offset += 1;
1614             } else
1615                 offset += 0;
1616             break;
1617
1618         case 0311:
1619             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1620                 *bytes = 0x67;
1621                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1622                 offset += 1;
1623             } else
1624                 offset += 0;
1625             break;
1626
1627         case 0312:
1628             break;
1629
1630         case 0313:
1631             ins->rex = 0;
1632             break;
1633
1634         case4(0314):
1635             break;
1636
1637         case 0320:
1638             if (bits != 16) {
1639                 *bytes = 0x66;
1640                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1641                 offset += 1;
1642             } else
1643                 offset += 0;
1644             break;
1645
1646         case 0321:
1647             if (bits == 16) {
1648                 *bytes = 0x66;
1649                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1650                 offset += 1;
1651             } else
1652                 offset += 0;
1653             break;
1654
1655         case 0322:
1656         case 0323:
1657             break;
1658
1659         case 0324:
1660             ins->rex |= REX_W;
1661             break;
1662
1663         case 0325:
1664             break;
1665
1666         case 0330:
1667             *bytes = *codes++ ^ condval[ins->condition];
1668             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1669             offset += 1;
1670             break;
1671
1672         case 0331:
1673             break;
1674
1675         case 0332:
1676         case 0333:
1677             *bytes = c - 0332 + 0xF2;
1678             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1679             offset += 1;
1680             break;
1681
1682         case 0334:
1683             if (ins->rex & REX_R) {
1684                 *bytes = 0xF0;
1685                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1686                 offset += 1;
1687             }
1688             ins->rex &= ~(REX_L|REX_R);
1689             break;
1690
1691         case 0335:
1692             break;
1693
1694         case 0336:
1695         case 0337:
1696             break;
1697
1698         case 0340:
1699             if (ins->oprs[0].segment != NO_SEG)
1700                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1701             else {
1702                 int64_t size = ins->oprs[0].offset;
1703                 if (size > 0)
1704                     out(offset, segment, NULL,
1705                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1706                 offset += size;
1707             }
1708             break;
1709
1710         case 0341:
1711             break;
1712
1713         case 0344:
1714         case 0345:
1715             bytes[0] = c & 1;
1716             switch (ins->oprs[0].basereg) {
1717             case R_CS:
1718                 bytes[0] += 0x0E;
1719                 break;
1720             case R_DS:
1721                 bytes[0] += 0x1E;
1722                 break;
1723             case R_ES:
1724                 bytes[0] += 0x06;
1725                 break;
1726             case R_SS:
1727                 bytes[0] += 0x16;
1728                 break;
1729             default:
1730                 errfunc(ERR_PANIC,
1731                         "bizarre 8086 segment register received");
1732             }
1733             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1734             offset++;
1735             break;
1736
1737         case 0346:
1738         case 0347:
1739             bytes[0] = c & 1;
1740             switch (ins->oprs[0].basereg) {
1741             case R_FS:
1742                 bytes[0] += 0xA0;
1743                 break;
1744             case R_GS:
1745                 bytes[0] += 0xA8;
1746                 break;
1747             default:
1748                 errfunc(ERR_PANIC,
1749                         "bizarre 386 segment register received");
1750             }
1751             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1752             offset++;
1753             break;
1754
1755         case 0360:
1756             break;
1757
1758         case 0361:
1759             bytes[0] = 0x66;
1760             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1761             offset += 1;
1762             break;
1763
1764         case 0362:
1765         case 0363:
1766             bytes[0] = c - 0362 + 0xf2;
1767             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1768             offset += 1;
1769             break;
1770
1771         case 0364:
1772         case 0365:
1773             break;
1774
1775         case 0366:
1776         case 0367:
1777             *bytes = c - 0366 + 0x66;
1778             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1779             offset += 1;
1780             break;
1781
1782         case 0370:
1783         case 0371:
1784         case 0372:
1785             break;
1786
1787         case 0373:
1788             *bytes = bits == 16 ? 3 : 5;
1789             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1790             offset += 1;
1791             break;
1792
1793         case4(0100):
1794         case4(0110):
1795         case4(0120):
1796         case4(0130):
1797         case4(0200):
1798         case4(0204):
1799         case4(0210):
1800         case4(0214):
1801         case4(0220):
1802         case4(0224):
1803         case4(0230):
1804         case4(0234):
1805             {
1806                 ea ea_data;
1807                 int rfield;
1808                 opflags_t rflags;
1809                 uint8_t *p;
1810                 int32_t s;
1811                 enum out_type type;
1812                 struct operand *opy = &ins->oprs[op2];
1813
1814                 if (c <= 0177) {
1815                     /* pick rfield from operand b (opx) */
1816                     rflags = regflag(opx);
1817                     rfield = nasm_regvals[opx->basereg];
1818                 } else {
1819                     /* rfield is constant */
1820                     rflags = 0;
1821                     rfield = c & 7;
1822                 }
1823
1824                 if (!process_ea(opy, &ea_data, bits, ins->addr_size,
1825                                 rfield, rflags)) {
1826                     errfunc(ERR_NONFATAL, "invalid effective address");
1827                 }
1828
1829
1830                 p = bytes;
1831                 *p++ = ea_data.modrm;
1832                 if (ea_data.sib_present)
1833                     *p++ = ea_data.sib;
1834
1835                 /* DREX suffixes come between the SIB and the displacement */
1836                 if (ins->rex & REX_D) {
1837                     *p++ = (ins->drexdst << 4) |
1838                            (ins->rex & REX_OC ? 0x08 : 0) |
1839                            (ins->rex & (REX_R|REX_X|REX_B));
1840                     ins->rex = 0;
1841                 }
1842
1843                 s = p - bytes;
1844                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1845
1846                 /*
1847                  * Make sure the address gets the right offset in case
1848                  * the line breaks in the .lst file (BR 1197827)
1849                  */
1850                 offset += s;
1851                 s = 0;
1852
1853                 switch (ea_data.bytes) {
1854                 case 0:
1855                     break;
1856                 case 1:
1857                 case 2:
1858                 case 4:
1859                 case 8:
1860                     data = opy->offset;
1861                     s += ea_data.bytes;
1862                     if (ea_data.rip) {
1863                         if (opy->segment == segment) {
1864                             data -= insn_end;
1865                             if (overflow_signed(data, ea_data.bytes))
1866                                 warn_overflow(ERR_PASS2, ea_data.bytes);
1867                             out(offset, segment, &data, OUT_ADDRESS,
1868                                 ea_data.bytes, NO_SEG, NO_SEG);
1869                         } else {
1870                             /* overflow check in output/linker? */
1871                             out(offset, segment, &data, OUT_REL4ADR,
1872                                 insn_end - offset, opy->segment, opy->wrt);
1873                         }
1874                     } else {
1875                         if (overflow_general(opy->offset, ins->addr_size >> 3) ||
1876                             signed_bits(opy->offset, ins->addr_size) !=
1877                             signed_bits(opy->offset, ea_data.bytes * 8))
1878                             warn_overflow(ERR_PASS2, ea_data.bytes);
1879
1880                         type = OUT_ADDRESS;
1881                         out(offset, segment, &data, OUT_ADDRESS,
1882                             ea_data.bytes, opy->segment, opy->wrt);
1883                     }
1884                     break;
1885                 default:
1886                     /* Impossible! */
1887                     errfunc(ERR_PANIC,
1888                             "Invalid amount of bytes (%d) for offset?!",
1889                             ea_data.bytes);
1890                     break;
1891                 }
1892                 offset += s;
1893             }
1894             break;
1895
1896         default:
1897             errfunc(ERR_PANIC, "internal instruction table corrupt"
1898                     ": instruction code \\%o (0x%02X) given", c, c);
1899             break;
1900         }
1901     }
1902 }
1903
1904 static opflags_t regflag(const operand * o)
1905 {
1906     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1907         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1908     }
1909     return nasm_reg_flags[o->basereg];
1910 }
1911
1912 static int32_t regval(const operand * o)
1913 {
1914     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1915         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1916     }
1917     return nasm_regvals[o->basereg];
1918 }
1919
1920 static int op_rexflags(const operand * o, int mask)
1921 {
1922     opflags_t flags;
1923     int val;
1924
1925     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1926         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1927     }
1928
1929     flags = nasm_reg_flags[o->basereg];
1930     val = nasm_regvals[o->basereg];
1931
1932     return rexflags(val, flags, mask);
1933 }
1934
1935 static int rexflags(int val, opflags_t flags, int mask)
1936 {
1937     int rex = 0;
1938
1939     if (val >= 8)
1940         rex |= REX_B|REX_X|REX_R;
1941     if (flags & BITS64)
1942         rex |= REX_W;
1943     if (!(REG_HIGH & ~flags))   /* AH, CH, DH, BH */
1944         rex |= REX_H;
1945     else if (!(REG8 & ~flags) && val >= 4) /* SPL, BPL, SIL, DIL */
1946         rex |= REX_P;
1947
1948     return rex & mask;
1949 }
1950
1951 static enum match_result find_match(const struct itemplate **tempp,
1952                                     insn *instruction,
1953                                     int32_t segment, int64_t offset, int bits)
1954 {
1955     const struct itemplate *temp;
1956     enum match_result m, merr;
1957     opflags_t xsizeflags[MAX_OPERANDS];
1958     bool opsizemissing = false;
1959     int i;
1960
1961     for (i = 0; i < instruction->operands; i++)
1962         xsizeflags[i] = instruction->oprs[i].type & SIZE_MASK;
1963
1964     merr = MERR_INVALOP;
1965
1966     for (temp = nasm_instructions[instruction->opcode];
1967          temp->opcode != I_none; temp++) {
1968         m = matches(temp, instruction, bits);
1969         if (m == MOK_JUMP) {
1970             if (jmp_match(segment, offset, bits, instruction, temp->code))
1971                 m = MOK_GOOD;
1972             else
1973                 m = MERR_INVALOP;
1974         } else if (m == MERR_OPSIZEMISSING &&
1975                    (temp->flags & IF_SMASK) != IF_SX) {
1976             /*
1977              * Missing operand size and a candidate for fuzzy matching...
1978              */
1979             for (i = 0; i < temp->operands; i++) {
1980                 if ((temp->opd[i] & SAME_AS) == 0)
1981                     xsizeflags[i] |= temp->opd[i] & SIZE_MASK;
1982             }
1983             opsizemissing = true;
1984         }
1985         if (m > merr)
1986             merr = m;
1987         if (merr == MOK_GOOD)
1988             goto done;
1989     }
1990
1991     /* No match, but see if we can get a fuzzy operand size match... */
1992     if (!opsizemissing)
1993         goto done;
1994
1995     for (i = 0; i < instruction->operands; i++) {
1996         /*
1997          * We ignore extrinsic operand sizes on registers, so we should
1998          * never try to fuzzy-match on them.  This also resolves the case
1999          * when we have e.g. "xmmrm128" in two different positions.
2000          */
2001         if (is_class(REGISTER, instruction->oprs[i].type))
2002             continue;
2003
2004         /* This tests if xsizeflags[i] has more than one bit set */
2005         if ((xsizeflags[i] & (xsizeflags[i]-1)))
2006             goto done;          /* No luck */
2007
2008         instruction->oprs[i].type |= xsizeflags[i]; /* Set the size */
2009     }
2010
2011     /* Try matching again... */
2012     for (temp = nasm_instructions[instruction->opcode];
2013          temp->opcode != I_none; temp++) {
2014         m = matches(temp, instruction, bits);
2015         if (m == MOK_JUMP) {
2016             if (jmp_match(segment, offset, bits, instruction, temp->code))
2017                 m = MOK_GOOD;
2018             else
2019                 m = MERR_INVALOP;
2020         }
2021         if (m > merr)
2022             merr = m;
2023         if (merr == MOK_GOOD)
2024             goto done;
2025     }
2026
2027 done:
2028     *tempp = temp;
2029     return merr;
2030 }
2031
2032 static enum match_result matches(const struct itemplate *itemp,
2033                                  insn *instruction, int bits)
2034 {
2035     int i, size[MAX_OPERANDS], asize, oprs;
2036     bool opsizemissing = false;
2037
2038     /*
2039      * Check the opcode
2040      */
2041     if (itemp->opcode != instruction->opcode)
2042         return MERR_INVALOP;
2043
2044     /*
2045      * Count the operands
2046      */
2047     if (itemp->operands != instruction->operands)
2048         return MERR_INVALOP;
2049
2050     /*
2051      * Check that no spurious colons or TOs are present
2052      */
2053     for (i = 0; i < itemp->operands; i++)
2054         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
2055             return MERR_INVALOP;
2056
2057     /*
2058      * Process size flags
2059      */
2060     switch (itemp->flags & IF_SMASK) {
2061     case IF_SB:
2062         asize = BITS8;
2063         break;
2064     case IF_SW:
2065         asize = BITS16;
2066         break;
2067     case IF_SD:
2068         asize = BITS32;
2069         break;
2070     case IF_SQ:
2071         asize = BITS64;
2072         break;
2073     case IF_SO:
2074         asize = BITS128;
2075         break;
2076     case IF_SY:
2077         asize = BITS256;
2078         break;
2079     case IF_SZ:
2080         switch (bits) {
2081         case 16:
2082             asize = BITS16;
2083             break;
2084         case 32:
2085             asize = BITS32;
2086             break;
2087         case 64:
2088             asize = BITS64;
2089             break;
2090         default:
2091             asize = 0;
2092             break;
2093         }
2094         break;
2095     default:
2096         asize = 0;
2097         break;
2098     }
2099
2100     if (itemp->flags & IF_ARMASK) {
2101         /* S- flags only apply to a specific operand */
2102         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
2103         memset(size, 0, sizeof size);
2104         size[i] = asize;
2105     } else {
2106         /* S- flags apply to all operands */
2107         for (i = 0; i < MAX_OPERANDS; i++)
2108             size[i] = asize;
2109     }
2110
2111     /*
2112      * Check that the operand flags all match up,
2113      * it's a bit tricky so lets be verbose:
2114      *
2115      * 1) Find out the size of operand. If instruction
2116      *    doesn't have one specified -- we're trying to
2117      *    guess it either from template (IF_S* flag) or
2118      *    from code bits.
2119      *
2120      * 2) If template operand (i) has SAME_AS flag [used for registers only]
2121      *    (ie the same operand as was specified somewhere in template, and
2122      *    this referred operand index is being achieved via ~SAME_AS)
2123      *    we are to be sure that both registers (in template and instruction)
2124      *    do exactly match.
2125      *
2126      * 3) If template operand do not match the instruction OR
2127      *    template has an operand size specified AND this size differ
2128      *    from which instruction has (perhaps we got it from code bits)
2129      *    we are:
2130      *      a)  Check that only size of instruction and operand is differ
2131      *          other characteristics do match
2132      *      b)  Perhaps it's a register specified in instruction so
2133      *          for such a case we just mark that operand as "size
2134      *          missing" and this will turn on fuzzy operand size
2135      *          logic facility (handled by a caller)
2136      */
2137     for (i = 0; i < itemp->operands; i++) {
2138         opflags_t type = instruction->oprs[i].type;
2139         if (!(type & SIZE_MASK))
2140             type |= size[i];
2141
2142         if (itemp->opd[i] & SAME_AS) {
2143             int j = itemp->opd[i] & ~SAME_AS;
2144             if (type != instruction->oprs[j].type ||
2145                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2146                 return MERR_INVALOP;
2147         } else if (itemp->opd[i] & ~type ||
2148             ((itemp->opd[i] & SIZE_MASK) &&
2149              ((itemp->opd[i] ^ type) & SIZE_MASK))) {
2150             if ((itemp->opd[i] & ~type & ~SIZE_MASK) || (type & SIZE_MASK)) {
2151                 return MERR_INVALOP;
2152             } else if (!is_class(REGISTER, type)) {
2153                 /*
2154                  * Note: we don't honor extrinsic operand sizes for registers,
2155                  * so "missing operand size" for a register should be
2156                  * considered a wildcard match rather than an error.
2157                  */
2158                 opsizemissing = true;
2159             }
2160         }
2161     }
2162
2163     if (opsizemissing)
2164         return MERR_OPSIZEMISSING;
2165
2166     /*
2167      * Check operand sizes
2168      */
2169     if (itemp->flags & (IF_SM | IF_SM2)) {
2170         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2171         for (i = 0; i < oprs; i++) {
2172             asize = itemp->opd[i] & SIZE_MASK;
2173             if (asize) {
2174                 for (i = 0; i < oprs; i++)
2175                     size[i] = asize;
2176                 break;
2177             }
2178         }
2179     } else {
2180         oprs = itemp->operands;
2181     }
2182
2183     for (i = 0; i < itemp->operands; i++) {
2184         if (!(itemp->opd[i] & SIZE_MASK) &&
2185             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2186             return MERR_OPSIZEMISMATCH;
2187     }
2188
2189     /*
2190      * Check template is okay at the set cpu level
2191      */
2192     if (((itemp->flags & IF_PLEVEL) > cpu))
2193         return MERR_BADCPU;
2194
2195     /*
2196      * Verify the appropriate long mode flag.
2197      */
2198     if ((itemp->flags & (bits == 64 ? IF_NOLONG : IF_LONG)))
2199         return MERR_BADMODE;
2200
2201     /*
2202      * Check if special handling needed for Jumps
2203      */
2204     if ((itemp->code[0] & 0374) == 0370)
2205         return MOK_JUMP;
2206
2207     return MOK_GOOD;
2208 }
2209
2210 static ea *process_ea(operand * input, ea * output, int bits,
2211                       int addrbits, int rfield, opflags_t rflags)
2212 {
2213     bool forw_ref = !!(input->opflags & OPFLAG_UNKNOWN);
2214
2215     output->rip = false;
2216
2217     /* REX flags for the rfield operand */
2218     output->rex |= rexflags(rfield, rflags, REX_R|REX_P|REX_W|REX_H);
2219
2220     if (is_class(REGISTER, input->type)) {  /* register direct */
2221         int i;
2222         opflags_t f;
2223
2224         if (input->basereg < EXPR_REG_START /* Verify as Register */
2225             || input->basereg >= REG_ENUM_LIMIT)
2226             return NULL;
2227         f = regflag(input);
2228         i = nasm_regvals[input->basereg];
2229
2230         if (REG_EA & ~f)
2231             return NULL;        /* Invalid EA register */
2232
2233         output->rex |= op_rexflags(input, REX_B|REX_P|REX_W|REX_H);
2234
2235         output->sib_present = false;             /* no SIB necessary */
2236         output->bytes = 0;  /* no offset necessary either */
2237         output->modrm = 0xC0 | ((rfield & 7) << 3) | (i & 7);
2238     } else {                    /* it's a memory reference */
2239         if (input->basereg == -1
2240             && (input->indexreg == -1 || input->scale == 0)) {
2241             /* it's a pure offset */
2242
2243             if (bits == 64 && ((input->type & IP_REL) == IP_REL) &&
2244                 input->segment == NO_SEG) {
2245                 nasm_error(ERR_WARNING | ERR_PASS1, "absolute address can not be RIP-relative");
2246                 input->type &= ~IP_REL;
2247                 input->type |= MEMORY;
2248             }
2249
2250             if (input->eaflags & EAF_BYTEOFFS ||
2251                 (input->eaflags & EAF_WORDOFFS &&
2252                  input->disp_size != (addrbits != 16 ? 32 : 16))) {
2253                 nasm_error(ERR_WARNING | ERR_PASS1, "displacement size ignored on absolute address");
2254             }
2255
2256             if (bits == 64 && (~input->type & IP_REL)) {
2257               int scale, index, base;
2258               output->sib_present = true;
2259               scale = 0;
2260               index = 4;
2261               base = 5;
2262               output->sib = (scale << 6) | (index << 3) | base;
2263               output->bytes = 4;
2264               output->modrm = 4 | ((rfield & 7) << 3);
2265               output->rip = false;
2266             } else {
2267               output->sib_present = false;
2268               output->bytes = (addrbits != 16 ? 4 : 2);
2269               output->modrm = (addrbits != 16 ? 5 : 6) | ((rfield & 7) << 3);
2270               output->rip = bits == 64;
2271             }
2272         } else {                /* it's an indirection */
2273             int i = input->indexreg, b = input->basereg, s = input->scale;
2274             int32_t seg = input->segment;
2275             int hb = input->hintbase, ht = input->hinttype;
2276             int t, it, bt;              /* register numbers */
2277             opflags_t x, ix, bx;        /* register flags */
2278
2279             if (s == 0)
2280                 i = -1;         /* make this easy, at least */
2281
2282             if (i >= EXPR_REG_START && i < REG_ENUM_LIMIT) {
2283                 it = nasm_regvals[i];
2284                 ix = nasm_reg_flags[i];
2285             } else {
2286                 it = -1;
2287                 ix = 0;
2288             }
2289
2290             if (b >= EXPR_REG_START && b < REG_ENUM_LIMIT) {
2291                 bt = nasm_regvals[b];
2292                 bx = nasm_reg_flags[b];
2293             } else {
2294                 bt = -1;
2295                 bx = 0;
2296             }
2297
2298             /* check for a 32/64-bit memory reference... */
2299             if ((ix|bx) & (BITS32|BITS64)) {
2300                 /* it must be a 32/64-bit memory reference. Firstly we have
2301                  * to check that all registers involved are type E/Rxx. */
2302                 int32_t sok = BITS32|BITS64, o = input->offset;
2303
2304                 if (it != -1) {
2305                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2306                         sok &= ix;
2307                     else
2308                         return NULL;
2309                 }
2310
2311                 if (bt != -1) {
2312                     if (REG_GPR & ~bx)
2313                         return NULL; /* Invalid register */
2314                     if (~sok & bx & SIZE_MASK)
2315                         return NULL; /* Invalid size */
2316                     sok &= bx;
2317                 }
2318
2319                 /* While we're here, ensure the user didn't specify
2320                    WORD or QWORD. */
2321                 if (input->disp_size == 16 || input->disp_size == 64)
2322                     return NULL;
2323
2324                 if (addrbits == 16 ||
2325                     (addrbits == 32 && !(sok & BITS32)) ||
2326                     (addrbits == 64 && !(sok & BITS64)))
2327                     return NULL;
2328
2329                 /* now reorganize base/index */
2330                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2331                     ((hb == b && ht == EAH_NOTBASE)
2332                      || (hb == i && ht == EAH_MAKEBASE))) {
2333                     /* swap if hints say so */
2334                     t = bt, bt = it, it = t;
2335                     x = bx, bx = ix, ix = x;
2336                 }
2337                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2338                     bt = -1, bx = 0, s++;
2339                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2340                     /* make single reg base, unless hint */
2341                     bt = it, bx = ix, it = -1, ix = 0;
2342                 }
2343                 if (((s == 2 && it != REG_NUM_ESP
2344                       && !(input->eaflags & EAF_TIMESTWO)) || s == 3
2345                      || s == 5 || s == 9) && bt == -1)
2346                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2347                 if (it == -1 && (bt & 7) != REG_NUM_ESP
2348                     && (input->eaflags & EAF_TIMESTWO))
2349                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2350                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2351                 if (s == 1 && it == REG_NUM_ESP) {
2352                     /* swap ESP into base if scale is 1 */
2353                     t = it, it = bt, bt = t;
2354                     x = ix, ix = bx, bx = x;
2355                 }
2356                 if (it == REG_NUM_ESP
2357                     || (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2358                     return NULL;        /* wrong, for various reasons */
2359
2360                 output->rex |= rexflags(it, ix, REX_X);
2361                 output->rex |= rexflags(bt, bx, REX_B);
2362
2363                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2364                     /* no SIB needed */
2365                     int mod, rm;
2366
2367                     if (bt == -1) {
2368                         rm = 5;
2369                         mod = 0;
2370                     } else {
2371                         rm = (bt & 7);
2372                         if (rm != REG_NUM_EBP && o == 0 &&
2373                                 seg == NO_SEG && !forw_ref &&
2374                                 !(input->eaflags &
2375                                   (EAF_BYTEOFFS | EAF_WORDOFFS)))
2376                             mod = 0;
2377                         else if (input->eaflags & EAF_BYTEOFFS ||
2378                                  (o >= -128 && o <= 127 && seg == NO_SEG
2379                                   && !forw_ref
2380                                   && !(input->eaflags & EAF_WORDOFFS)))
2381                             mod = 1;
2382                         else
2383                             mod = 2;
2384                     }
2385
2386                     output->sib_present = false;
2387                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2388                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2389                 } else {
2390                     /* we need a SIB */
2391                     int mod, scale, index, base;
2392
2393                     if (it == -1)
2394                         index = 4, s = 1;
2395                     else
2396                         index = (it & 7);
2397
2398                     switch (s) {
2399                     case 1:
2400                         scale = 0;
2401                         break;
2402                     case 2:
2403                         scale = 1;
2404                         break;
2405                     case 4:
2406                         scale = 2;
2407                         break;
2408                     case 8:
2409                         scale = 3;
2410                         break;
2411                     default:   /* then what the smeg is it? */
2412                         return NULL;    /* panic */
2413                     }
2414
2415                     if (bt == -1) {
2416                         base = 5;
2417                         mod = 0;
2418                     } else {
2419                         base = (bt & 7);
2420                         if (base != REG_NUM_EBP && o == 0 &&
2421                                     seg == NO_SEG && !forw_ref &&
2422                                     !(input->eaflags &
2423                                       (EAF_BYTEOFFS | EAF_WORDOFFS)))
2424                             mod = 0;
2425                         else if (input->eaflags & EAF_BYTEOFFS ||
2426                                  (o >= -128 && o <= 127 && seg == NO_SEG
2427                                   && !forw_ref
2428                                   && !(input->eaflags & EAF_WORDOFFS)))
2429                             mod = 1;
2430                         else
2431                             mod = 2;
2432                     }
2433
2434                     output->sib_present = true;
2435                     output->bytes =  (bt == -1 || mod == 2 ? 4 : mod);
2436                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | 4;
2437                     output->sib = (scale << 6) | (index << 3) | base;
2438                 }
2439             } else {            /* it's 16-bit */
2440                 int mod, rm;
2441                 int16_t o = input->offset;
2442
2443                 /* check for 64-bit long mode */
2444                 if (addrbits == 64)
2445                     return NULL;
2446
2447                 /* check all registers are BX, BP, SI or DI */
2448                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI
2449                      && b != R_DI) || (i != -1 && i != R_BP && i != R_BX
2450                                        && i != R_SI && i != R_DI))
2451                     return NULL;
2452
2453                 /* ensure the user didn't specify DWORD/QWORD */
2454                 if (input->disp_size == 32 || input->disp_size == 64)
2455                     return NULL;
2456
2457                 if (s != 1 && i != -1)
2458                     return NULL;        /* no can do, in 16-bit EA */
2459                 if (b == -1 && i != -1) {
2460                     int tmp = b;
2461                     b = i;
2462                     i = tmp;
2463                 }               /* swap */
2464                 if ((b == R_SI || b == R_DI) && i != -1) {
2465                     int tmp = b;
2466                     b = i;
2467                     i = tmp;
2468                 }
2469                 /* have BX/BP as base, SI/DI index */
2470                 if (b == i)
2471                     return NULL;        /* shouldn't ever happen, in theory */
2472                 if (i != -1 && b != -1 &&
2473                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2474                     return NULL;        /* invalid combinations */
2475                 if (b == -1)    /* pure offset: handled above */
2476                     return NULL;        /* so if it gets to here, panic! */
2477
2478                 rm = -1;
2479                 if (i != -1)
2480                     switch (i * 256 + b) {
2481                     case R_SI * 256 + R_BX:
2482                         rm = 0;
2483                         break;
2484                     case R_DI * 256 + R_BX:
2485                         rm = 1;
2486                         break;
2487                     case R_SI * 256 + R_BP:
2488                         rm = 2;
2489                         break;
2490                     case R_DI * 256 + R_BP:
2491                         rm = 3;
2492                         break;
2493                 } else
2494                     switch (b) {
2495                     case R_SI:
2496                         rm = 4;
2497                         break;
2498                     case R_DI:
2499                         rm = 5;
2500                         break;
2501                     case R_BP:
2502                         rm = 6;
2503                         break;
2504                     case R_BX:
2505                         rm = 7;
2506                         break;
2507                     }
2508                 if (rm == -1)   /* can't happen, in theory */
2509                     return NULL;        /* so panic if it does */
2510
2511                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2512                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2513                     mod = 0;
2514                 else if (input->eaflags & EAF_BYTEOFFS ||
2515                          (o >= -128 && o <= 127 && seg == NO_SEG
2516                           && !forw_ref
2517                           && !(input->eaflags & EAF_WORDOFFS)))
2518                     mod = 1;
2519                 else
2520                     mod = 2;
2521
2522                 output->sib_present = false;    /* no SIB - it's 16-bit */
2523                 output->bytes = mod;    /* bytes of offset needed */
2524                 output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2525             }
2526         }
2527     }
2528
2529     output->size = 1 + output->sib_present + output->bytes;
2530     return output;
2531 }
2532
2533 static void add_asp(insn *ins, int addrbits)
2534 {
2535     int j, valid;
2536     int defdisp;
2537
2538     valid = (addrbits == 64) ? 64|32 : 32|16;
2539
2540     switch (ins->prefixes[PPS_ASIZE]) {
2541     case P_A16:
2542         valid &= 16;
2543         break;
2544     case P_A32:
2545         valid &= 32;
2546         break;
2547     case P_A64:
2548         valid &= 64;
2549         break;
2550     case P_ASP:
2551         valid &= (addrbits == 32) ? 16 : 32;
2552         break;
2553     default:
2554         break;
2555     }
2556
2557     for (j = 0; j < ins->operands; j++) {
2558         if (is_class(MEMORY, ins->oprs[j].type)) {
2559             opflags_t i, b;
2560
2561             /* Verify as Register */
2562             if (ins->oprs[j].indexreg < EXPR_REG_START
2563                 || ins->oprs[j].indexreg >= REG_ENUM_LIMIT)
2564                 i = 0;
2565             else
2566                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2567
2568             /* Verify as Register */
2569             if (ins->oprs[j].basereg < EXPR_REG_START
2570                 || ins->oprs[j].basereg >= REG_ENUM_LIMIT)
2571                 b = 0;
2572             else
2573                 b = nasm_reg_flags[ins->oprs[j].basereg];
2574
2575             if (ins->oprs[j].scale == 0)
2576                 i = 0;
2577
2578             if (!i && !b) {
2579                 int ds = ins->oprs[j].disp_size;
2580                 if ((addrbits != 64 && ds > 8) ||
2581                     (addrbits == 64 && ds == 16))
2582                     valid &= ds;
2583             } else {
2584                 if (!(REG16 & ~b))
2585                     valid &= 16;
2586                 if (!(REG32 & ~b))
2587                     valid &= 32;
2588                 if (!(REG64 & ~b))
2589                     valid &= 64;
2590
2591                 if (!(REG16 & ~i))
2592                     valid &= 16;
2593                 if (!(REG32 & ~i))
2594                     valid &= 32;
2595                 if (!(REG64 & ~i))
2596                     valid &= 64;
2597             }
2598         }
2599     }
2600
2601     if (valid & addrbits) {
2602         ins->addr_size = addrbits;
2603     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2604         /* Add an address size prefix */
2605         enum prefixes pref = (addrbits == 32) ? P_A16 : P_A32;
2606         ins->prefixes[PPS_ASIZE] = pref;
2607         ins->addr_size = (addrbits == 32) ? 16 : 32;
2608     } else {
2609         /* Impossible... */
2610         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2611         ins->addr_size = addrbits; /* Error recovery */
2612     }
2613
2614     defdisp = ins->addr_size == 16 ? 16 : 32;
2615
2616     for (j = 0; j < ins->operands; j++) {
2617         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2618             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp)
2619             != ins->addr_size) {
2620             /* mem_offs sizes must match the address size; if not,
2621                strip the MEM_OFFS bit and match only EA instructions */
2622             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2623         }
2624     }
2625 }