assemble.c

   1 /* ----------------------------------------------------------------------- *
   2  *
   3  *   Copyright 1996-2009 The NASM Authors - All Rights Reserved
   4  *   See the file AUTHORS included with the NASM distribution for
   5  *   the specific copyright holders.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following
   9  *   conditions are met:
  10  *
  11  *   * Redistributions of source code must retain the above copyright
  12  *     notice, this list of conditions and the following disclaimer.
  13  *   * Redistributions in binary form must reproduce the above
  14  *     copyright notice, this list of conditions and the following
  15  *     disclaimer in the documentation and/or other materials provided
  16  *     with the distribution.
  17  *
  18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  19  *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  20  *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  21  *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22  *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  23  *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24  *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  25  *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28  *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  29  *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  30  *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31  *
  32  * ----------------------------------------------------------------------- */
  33
  34 /*
  35  * assemble.c   code generation for the Netwide Assembler
  36  *
  37  * the actual codes (C syntax, i.e. octal):
  38  * \0            - terminates the code. (Unless it's a literal of course.)
  39  * \1..\4        - that many literal bytes follow in the code stream
  40  * \5            - add 4 to the primary operand number (b, low octdigit)
  41  * \6            - add 4 to the secondary operand number (a, middle octdigit)
  42  * \7            - add 4 to both the primary and the secondary operand number
  43  * \10..\13      - a literal byte follows in the code stream, to be added
  44  *                 to the register value of operand 0..3
  45  * \14..\17      - a signed byte immediate operand, from operand 0..3
  46  * \20..\23      - a byte immediate operand, from operand 0..3
  47  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  48  * \30..\33      - a word immediate operand, from operand 0..3
  49  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  50  *                 assembly mode or the operand-size override on the operand
  51  * \40..\43      - a long immediate operand, from operand 0..3
  52  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  53  *                 depending on the address size of the instruction.
  54  * \50..\53      - a byte relative operand, from operand 0..3
  55  * \54..\57      - a qword immediate operand, from operand 0..3
  56  * \60..\63      - a word relative operand, from operand 0..3
  57  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  58  *                 assembly mode or the operand-size override on the operand
  59  * \70..\73      - a long relative operand, from operand 0..3
  60  * \74..\77      - a word constant, from the _segment_ part of operand 0..3
  61  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  62  *                 field the register value of operand b.
  63  * \140..\143    - an immediate word or signed byte for operand 0..3
  64  * \144..\147    - or 2 (s-field) into opcode byte if operand 0..3
  65  *                  is a signed byte rather than a word.  Opcode byte follows.
  66  * \150..\153    - an immediate dword or signed byte for operand 0..3
  67  * \154..\157    - or 2 (s-field) into opcode byte if operand 0..3
  68  *                  is a signed byte rather than a dword.  Opcode byte follows.
  69  * \160..\163    - this instruction uses DREX rather than REX, with the
  70  *                 OC0 field set to 0, and the dest field taken from
  71  *                 operand 0..3.
  72  * \164..\167    - this instruction uses DREX rather than REX, with the
  73  *                 OC0 field set to 1, and the dest field taken from
  74  *                 operand 0..3.
  75  * \171          - placement of DREX suffix in the absence of an EA
  76  * \172\ab       - the register number from operand a in bits 7..4, with
  77  *                 the 4-bit immediate from operand b in bits 3..0.
  78  * \173\xab      - the register number from operand a in bits 7..4, with
  79  *                 the value b in bits 3..0.
  80  * \174\a        - the register number from operand a in bits 7..4, and
  81  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  82  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  83  *                 field equal to digit b.
  84  * \250..\253    - same as \150..\153, except warn if the 64-bit operand
  85  *                 is not equal to the truncated and sign-extended 32-bit
  86  *                 operand; used for 32-bit immediates in 64-bit mode.
  87  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
  88  * \260..\263    - this instruction uses VEX/XOP rather than REX, with the
  89  *                 V field taken from operand 0..3.
  90  * \270          - this instruction uses VEX/XOP rather than REX, with the
  91  *                 V field set to 1111b.
  92  *
  93  * VEX/XOP prefixes are followed by the sequence:
  94  * \tmm\wlp        where mm is the M field; and wlp is:
  95  *                 00 0ww lpp
  96  *                 [w0] ww = 0 for W = 0
  97  *                 [w1] ww = 1 for W = 1
  98  *                 [wx] ww = 2 for W don't care (always assembled as 0)
  99  *                 [ww] ww = 3 for W used as REX.W
 100  *
 101  * t = 0 for VEX (C4/C5), t = 1 for XOP (8F).
 102  *
 103  * \274..\277    - a signed byte immediate operand, from operand 0..3,
 104  *                 which is to be extended to the operand size.
 105  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
 106  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
 107  * \312          - (disassembler only) invalid with non-default address size.
 108  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
 109  * \314          - (disassembler only) invalid with REX.B
 110  * \315          - (disassembler only) invalid with REX.X
 111  * \316          - (disassembler only) invalid with REX.R
 112  * \317          - (disassembler only) invalid with REX.W
 113  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
 114  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
 115  * \322          - indicates that this instruction is only valid when the
 116  *                 operand size is the default (instruction to disassembler,
 117  *                 generates no code in the assembler)
 118  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
 119  * \324          - indicates 64-bit operand size requiring REX prefix.
 120  * \325          - instruction which always uses spl/bpl/sil/dil
 121  * \330          - a literal byte follows in the code stream, to be added
 122  *                 to the condition code value of the instruction.
 123  * \331          - instruction not valid with REP prefix.  Hint for
 124  *                 disassembler only; for SSE instructions.
 125  * \332          - REP prefix (0xF2 byte) used as opcode extension.
 126  * \333          - REP prefix (0xF3 byte) used as opcode extension.
 127  * \334          - LOCK prefix used as REX.R (used in non-64-bit mode)
 128  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
 129  * \336          - force a REP(E) prefix (0xF2) even if not specified.
 130  * \337          - force a REPNE prefix (0xF3) even if not specified.
 131  *                 \336-\337 are still listed as prefixes in the disassembler.
 132  * \340          - reserve <operand 0> bytes of uninitialized storage.
 133  *                 Operand 0 had better be a segmentless constant.
 134  * \341          - this instruction needs a WAIT "prefix"
 135  * \344,\345     - the PUSH/POP (respectively) codes for CS, DS, ES, SS
 136  *                 (POP is never used for CS) depending on operand 0
 137  * \346,\347     - the second byte of PUSH/POP codes for FS, GS, depending
 138  *                 on operand 0
 139  * \360          - no SSE prefix (== \364\331)
 140  * \361          - 66 SSE prefix (== \366\331)
 141  * \362          - F2 SSE prefix (== \364\332)
 142  * \363          - F3 SSE prefix (== \364\333)
 143  * \364          - operand-size prefix (0x66) not permitted
 144  * \365          - address-size prefix (0x67) not permitted
 145  * \366          - operand-size prefix (0x66) used as opcode extension
 146  * \367          - address-size prefix (0x67) used as opcode extension
 147  * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
 148  *                 370 is used for Jcc, 371 is used for JMP.
 149  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 150  *                 used for conditional jump over longer jump
 151  */
 152
 153 #include "compiler.h"
 154
 155 #include <stdio.h>
 156 #include <string.h>
 157 #include <inttypes.h>
 158
 159 #include "nasm.h"
 160 #include "nasmlib.h"
 161 #include "assemble.h"
 162 #include "insns.h"
 163 #include "tables.h"
 164
 165 enum match_result {
 166     /*
 167      * Matching errors.  These should be sorted so that more specific
 168      * errors come later in the sequence.
 169      */
 170     MERR_INVALOP,
 171     MERR_OPSIZEMISSING,
 172     MERR_OPSIZEMISMATCH,
 173     MERR_BADCPU,
 174     MERR_BADMODE,
 175     /*
 176      * Matching success; the conditional ones first
 177      */
 178     MOK_JUMP,                   /* Matching OK but needs jmp_match() */
 179     MOK_GOOD                    /* Matching unconditionally OK */
 180 };
 181
 182 typedef struct {
 183     int sib_present;                 /* is a SIB byte necessary? */
 184     int bytes;                       /* # of bytes of offset needed */
 185     int size;                        /* lazy - this is sib+bytes+1 */
 186     uint8_t modrm, sib, rex, rip;    /* the bytes themselves */
 187 } ea;
 188
 189 static uint32_t cpu;            /* cpu level received from nasm.c */
 190 static efunc errfunc;
 191 static struct ofmt *outfmt;
 192 static ListGen *list;
 193
 194 static int64_t calcsize(int32_t, int64_t, int, insn *, const uint8_t *);
 195 static void gencode(int32_t segment, int64_t offset, int bits,
 196                     insn * ins, const struct itemplate *temp,
 197                     int64_t insn_end);
 198 static enum match_result matches(const struct itemplate *, insn *, int bits);
 199 static int32_t regflag(const operand *);
 200 static int32_t regval(const operand *);
 201 static int rexflags(int, int32_t, int);
 202 static int op_rexflags(const operand *, int);
 203 static ea *process_ea(operand *, ea *, int, int, int, int32_t);
 204 static void add_asp(insn *, int);
 205
 206 static int has_prefix(insn * ins, enum prefix_pos pos, enum prefixes prefix)
 207 {
 208     return ins->prefixes[pos] == prefix;
 209 }
 210
 211 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 212 {
 213     if (ins->prefixes[pos])
 214         errfunc(ERR_NONFATAL, "invalid %s prefix",
 215                 prefix_name(ins->prefixes[pos]));
 216 }
 217
 218 static const char *size_name(int size)
 219 {
 220     switch (size) {
 221     case 1:
 222         return "byte";
 223     case 2:
 224         return "word";
 225     case 4:
 226         return "dword";
 227     case 8:
 228         return "qword";
 229     case 10:
 230         return "tword";
 231     case 16:
 232         return "oword";
 233     case 32:
 234         return "yword";
 235     default:
 236         return "???";
 237     }
 238 }
 239
 240 static void warn_overflow(int size, const struct operand *o)
 241 {
 242     if (size < 8 && o->wrt == NO_SEG && o->segment == NO_SEG) {
 243         int64_t lim = ((int64_t)1 << (size*8))-1;
 244         int64_t data = o->offset;
 245
 246         if (data < ~lim || data > lim)
 247             errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
 248                     "%s data exceeds bounds", size_name(size));
 249     }
 250 }
 251 /*
 252  * This routine wrappers the real output format's output routine,
 253  * in order to pass a copy of the data off to the listing file
 254  * generator at the same time.
 255  */
 256 static void out(int64_t offset, int32_t segto, const void *data,
 257                 enum out_type type, uint64_t size,
 258                 int32_t segment, int32_t wrt)
 259 {
 260     static int32_t lineno = 0;     /* static!!! */
 261     static char *lnfname = NULL;
 262     uint8_t p[8];
 263
 264     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 265         /*
 266          * This is a non-relocated address, and we're going to
 267          * convert it into RAWDATA format.
 268          */
 269         uint8_t *q = p;
 270
 271         if (size > 8) {
 272             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 273             return;
 274         }
 275
 276         WRITEADDR(q, *(int64_t *)data, size);
 277         data = p;
 278         type = OUT_RAWDATA;
 279     }
 280
 281     list->output(offset, data, type, size);
 282
 283     /*
 284      * this call to src_get determines when we call the
 285      * debug-format-specific "linenum" function
 286      * it updates lineno and lnfname to the current values
 287      * returning 0 if "same as last time", -2 if lnfname
 288      * changed, and the amount by which lineno changed,
 289      * if it did. thus, these variables must be static
 290      */
 291
 292     if (src_get(&lineno, &lnfname)) {
 293         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 294     }
 295
 296     outfmt->output(segto, data, type, size, segment, wrt);
 297 }
 298
 299 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 300                      insn * ins, const uint8_t *code)
 301 {
 302     int64_t isize;
 303     uint8_t c = code[0];
 304
 305     if ((c != 0370 && c != 0371) || (ins->oprs[0].type & STRICT))
 306         return false;
 307     if (!optimizing)
 308         return false;
 309     if (optimizing < 0 && c == 0371)
 310         return false;
 311
 312     isize = calcsize(segment, offset, bits, ins, code);
 313
 314     if (ins->oprs[0].opflags & OPFLAG_UNKNOWN)
 315         /* Be optimistic in pass 1 */
 316         return true;
 317
 318     if (ins->oprs[0].segment != segment)
 319         return false;
 320
 321     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 322     return (isize >= -128 && isize <= 127); /* is it byte size? */
 323 }
 324
 325 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 326                  insn * instruction, struct ofmt *output, efunc error,
 327                  ListGen * listgen)
 328 {
 329     const struct itemplate *temp;
 330     int j;
 331     enum match_result size_prob;
 332     int64_t insn_end;
 333     int32_t itimes;
 334     int64_t start = offset;
 335     int64_t wsize = 0;             /* size for DB etc. */
 336
 337     errfunc = error;            /* to pass to other functions */
 338     cpu = cp;
 339     outfmt = output;            /* likewise */
 340     list = listgen;             /* and again */
 341
 342     switch (instruction->opcode) {
 343     case -1:
 344         return 0;
 345     case I_DB:
 346         wsize = 1;
 347         break;
 348     case I_DW:
 349         wsize = 2;
 350         break;
 351     case I_DD:
 352         wsize = 4;
 353         break;
 354     case I_DQ:
 355         wsize = 8;
 356         break;
 357     case I_DT:
 358         wsize = 10;
 359         break;
 360     case I_DO:
 361         wsize = 16;
 362         break;
 363     case I_DY:
 364         wsize = 32;
 365         break;
 366     default:
 367         break;
 368     }
 369
 370     if (wsize) {
 371         extop *e;
 372         int32_t t = instruction->times;
 373         if (t < 0)
 374             errfunc(ERR_PANIC,
 375                     "instruction->times < 0 (%ld) in assemble()", t);
 376
 377         while (t--) {           /* repeat TIMES times */
 378             for (e = instruction->eops; e; e = e->next) {
 379                 if (e->type == EOT_DB_NUMBER) {
 380                     if (wsize == 1) {
 381                         if (e->segment != NO_SEG)
 382                             errfunc(ERR_NONFATAL,
 383                                     "one-byte relocation attempted");
 384                         else {
 385                             uint8_t out_byte = e->offset;
 386                             out(offset, segment, &out_byte,
 387                                 OUT_RAWDATA, 1, NO_SEG, NO_SEG);
 388                         }
 389                     } else if (wsize > 8) {
 390                         errfunc(ERR_NONFATAL,
 391                                 "integer supplied to a DT, DO or DY"
 392                                 " instruction");
 393                     } else
 394                         out(offset, segment, &e->offset,
 395                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 396                     offset += wsize;
 397                 } else if (e->type == EOT_DB_STRING ||
 398                            e->type == EOT_DB_STRING_FREE) {
 399                     int align;
 400
 401                     out(offset, segment, e->stringval,
 402                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 403                     align = e->stringlen % wsize;
 404
 405                     if (align) {
 406                         align = wsize - align;
 407                         out(offset, segment, zero_buffer,
 408                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 409                     }
 410                     offset += e->stringlen + align;
 411                 }
 412             }
 413             if (t > 0 && t == instruction->times - 1) {
 414                 /*
 415                  * Dummy call to list->output to give the offset to the
 416                  * listing module.
 417                  */
 418                 list->output(offset, NULL, OUT_RAWDATA, 0);
 419                 list->uplevel(LIST_TIMES);
 420             }
 421         }
 422         if (instruction->times > 1)
 423             list->downlevel(LIST_TIMES);
 424         return offset - start;
 425     }
 426
 427     if (instruction->opcode == I_INCBIN) {
 428         const char *fname = instruction->eops->stringval;
 429         FILE *fp;
 430
 431         fp = fopen(fname, "rb");
 432         if (!fp) {
 433             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 434                   fname);
 435         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 436             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 437                   fname);
 438         } else {
 439             static char buf[4096];
 440             size_t t = instruction->times;
 441             size_t base = 0;
 442             size_t len;
 443
 444             len = ftell(fp);
 445             if (instruction->eops->next) {
 446                 base = instruction->eops->next->offset;
 447                 len -= base;
 448                 if (instruction->eops->next->next &&
 449                     len > (size_t)instruction->eops->next->next->offset)
 450                     len = (size_t)instruction->eops->next->next->offset;
 451             }
 452             /*
 453              * Dummy call to list->output to give the offset to the
 454              * listing module.
 455              */
 456             list->output(offset, NULL, OUT_RAWDATA, 0);
 457             list->uplevel(LIST_INCBIN);
 458             while (t--) {
 459                 size_t l;
 460
 461                 fseek(fp, base, SEEK_SET);
 462                 l = len;
 463                 while (l > 0) {
 464                     int32_t m;
 465                     m = fread(buf, 1, l > sizeof(buf) ? sizeof(buf) : l, fp);
 466                     if (!m) {
 467                         /*
 468                          * This shouldn't happen unless the file
 469                          * actually changes while we are reading
 470                          * it.
 471                          */
 472                         error(ERR_NONFATAL,
 473                               "`incbin': unexpected EOF while"
 474                               " reading file `%s'", fname);
 475                         t = 0;  /* Try to exit cleanly */
 476                         break;
 477                     }
 478                     out(offset, segment, buf, OUT_RAWDATA, m,
 479                         NO_SEG, NO_SEG);
 480                     l -= m;
 481                 }
 482             }
 483             list->downlevel(LIST_INCBIN);
 484             if (instruction->times > 1) {
 485                 /*
 486                  * Dummy call to list->output to give the offset to the
 487                  * listing module.
 488                  */
 489                 list->output(offset, NULL, OUT_RAWDATA, 0);
 490                 list->uplevel(LIST_TIMES);
 491                 list->downlevel(LIST_TIMES);
 492             }
 493             fclose(fp);
 494             return instruction->times * len;
 495         }
 496         return 0;               /* if we're here, there's an error */
 497     }
 498
 499     /* Check to see if we need an address-size prefix */
 500     add_asp(instruction, bits);
 501
 502     size_prob = MERR_INVALOP;
 503
 504     for (temp = nasm_instructions[instruction->opcode];
 505          temp->opcode != -1; temp++){
 506         enum match_result m = matches(temp, instruction, bits);
 507         if (m == MOK_GOOD ||
 508             (m == MOK_JUMP && jmp_match(segment, offset, bits,
 509                                         instruction, temp->code))) {
 510             /* Matches! */
 511             int64_t insn_size = calcsize(segment, offset, bits,
 512                                          instruction, temp->code);
 513             itimes = instruction->times;
 514             if (insn_size < 0)  /* shouldn't be, on pass two */
 515                 error(ERR_PANIC, "errors made it through from pass one");
 516             else
 517                 while (itimes--) {
 518                     for (j = 0; j < MAXPREFIX; j++) {
 519                         uint8_t c = 0;
 520                         switch (instruction->prefixes[j]) {
 521                         case P_WAIT:
 522                             c = 0x9B;
 523                             break;
 524                         case P_LOCK:
 525                             c = 0xF0;
 526                             break;
 527                         case P_REPNE:
 528                         case P_REPNZ:
 529                             c = 0xF2;
 530                             break;
 531                         case P_REPE:
 532                         case P_REPZ:
 533                         case P_REP:
 534                             c = 0xF3;
 535                             break;
 536                         case R_CS:
 537                             if (bits == 64) {
 538                                 error(ERR_WARNING | ERR_PASS2,
 539                                       "cs segment base generated, but will be ignored in 64-bit mode");
 540                             }
 541                             c = 0x2E;
 542                             break;
 543                         case R_DS:
 544                             if (bits == 64) {
 545                                 error(ERR_WARNING | ERR_PASS2,
 546                                       "ds segment base generated, but will be ignored in 64-bit mode");
 547                             }
 548                             c = 0x3E;
 549                             break;
 550                         case R_ES:
 551                            if (bits == 64) {
 552                                 error(ERR_WARNING | ERR_PASS2,
 553                                       "es segment base generated, but will be ignored in 64-bit mode");
 554                            }
 555                             c = 0x26;
 556                             break;
 557                         case R_FS:
 558                             c = 0x64;
 559                             break;
 560                         case R_GS:
 561                             c = 0x65;
 562                             break;
 563                         case R_SS:
 564                             if (bits == 64) {
 565                                 error(ERR_WARNING | ERR_PASS2,
 566                                       "ss segment base generated, but will be ignored in 64-bit mode");
 567                             }
 568                             c = 0x36;
 569                             break;
 570                         case R_SEGR6:
 571                         case R_SEGR7:
 572                             error(ERR_NONFATAL,
 573                                   "segr6 and segr7 cannot be used as prefixes");
 574                             break;
 575                         case P_A16:
 576                             if (bits == 64) {
 577                                 error(ERR_NONFATAL,
 578                                       "16-bit addressing is not supported "
 579                                       "in 64-bit mode");
 580                             } else if (bits != 16)
 581                                 c = 0x67;
 582                             break;
 583                         case P_A32:
 584                             if (bits != 32)
 585                                 c = 0x67;
 586                             break;
 587                         case P_A64:
 588                             if (bits != 64) {
 589                                 error(ERR_NONFATAL,
 590                                       "64-bit addressing is only supported "
 591                                       "in 64-bit mode");
 592                             }
 593                             break;
 594                         case P_ASP:
 595                             c = 0x67;
 596                             break;
 597                         case P_O16:
 598                             if (bits != 16)
 599                                 c = 0x66;
 600                             break;
 601                         case P_O32:
 602                             if (bits == 16)
 603                                 c = 0x66;
 604                             break;
 605                         case P_O64:
 606                             /* REX.W */
 607                             break;
 608                         case P_OSP:
 609                             c = 0x66;
 610                             break;
 611                         case P_none:
 612                             break;
 613                         default:
 614                             error(ERR_PANIC, "invalid instruction prefix");
 615                         }
 616                         if (c != 0) {
 617                             out(offset, segment, &c, OUT_RAWDATA, 1,
 618                                 NO_SEG, NO_SEG);
 619                             offset++;
 620                         }
 621                     }
 622                     insn_end = offset + insn_size;
 623                     gencode(segment, offset, bits, instruction,
 624                             temp, insn_end);
 625                     offset += insn_size;
 626                     if (itimes > 0 && itimes == instruction->times - 1) {
 627                         /*
 628                          * Dummy call to list->output to give the offset to the
 629                          * listing module.
 630                          */
 631                         list->output(offset, NULL, OUT_RAWDATA, 0);
 632                         list->uplevel(LIST_TIMES);
 633                     }
 634                 }
 635             if (instruction->times > 1)
 636                 list->downlevel(LIST_TIMES);
 637             return offset - start;
 638         } else if (m > 0 && m > size_prob) {
 639             size_prob = m;
 640         }
 641     }
 642
 643     if (temp->opcode == -1) {   /* didn't match any instruction */
 644         switch (size_prob) {
 645         case MERR_OPSIZEMISSING:
 646             error(ERR_NONFATAL, "operation size not specified");
 647             break;
 648         case MERR_OPSIZEMISMATCH:
 649             error(ERR_NONFATAL, "mismatch in operand sizes");
 650             break;
 651         case MERR_BADCPU:
 652             error(ERR_NONFATAL, "no instruction for this cpu level");
 653             break;
 654         case MERR_BADMODE:
 655             error(ERR_NONFATAL, "instruction not supported in %d-bit mode",
 656                   bits);
 657             break;
 658         default:
 659             error(ERR_NONFATAL,
 660                   "invalid combination of opcode and operands");
 661             break;
 662         }
 663     }
 664     return 0;
 665 }
 666
 667 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 668                   insn * instruction, efunc error)
 669 {
 670     const struct itemplate *temp;
 671
 672     errfunc = error;            /* to pass to other functions */
 673     cpu = cp;
 674
 675     if (instruction->opcode == -1)
 676         return 0;
 677
 678     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 679         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 680         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 681         instruction->opcode == I_DY) {
 682         extop *e;
 683         int32_t isize, osize, wsize = 0;   /* placate gcc */
 684
 685         isize = 0;
 686         switch (instruction->opcode) {
 687         case I_DB:
 688             wsize = 1;
 689             break;
 690         case I_DW:
 691             wsize = 2;
 692             break;
 693         case I_DD:
 694             wsize = 4;
 695             break;
 696         case I_DQ:
 697             wsize = 8;
 698             break;
 699         case I_DT:
 700             wsize = 10;
 701             break;
 702         case I_DO:
 703             wsize = 16;
 704             break;
 705         case I_DY:
 706             wsize = 32;
 707             break;
 708         default:
 709             break;
 710         }
 711
 712         for (e = instruction->eops; e; e = e->next) {
 713             int32_t align;
 714
 715             osize = 0;
 716             if (e->type == EOT_DB_NUMBER)
 717                 osize = 1;
 718             else if (e->type == EOT_DB_STRING ||
 719                      e->type == EOT_DB_STRING_FREE)
 720                 osize = e->stringlen;
 721
 722             align = (-osize) % wsize;
 723             if (align < 0)
 724                 align += wsize;
 725             isize += osize + align;
 726         }
 727         return isize * instruction->times;
 728     }
 729
 730     if (instruction->opcode == I_INCBIN) {
 731         const char *fname = instruction->eops->stringval;
 732         FILE *fp;
 733         size_t len;
 734
 735         fp = fopen(fname, "rb");
 736         if (!fp)
 737             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 738                   fname);
 739         else if (fseek(fp, 0L, SEEK_END) < 0)
 740             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 741                   fname);
 742         else {
 743             len = ftell(fp);
 744             fclose(fp);
 745             if (instruction->eops->next) {
 746                 len -= instruction->eops->next->offset;
 747                 if (instruction->eops->next->next &&
 748                     len > (size_t)instruction->eops->next->next->offset) {
 749                     len = (size_t)instruction->eops->next->next->offset;
 750                 }
 751             }
 752             return instruction->times * len;
 753         }
 754         return 0;               /* if we're here, there's an error */
 755     }
 756
 757     /* Check to see if we need an address-size prefix */
 758     add_asp(instruction, bits);
 759
 760     for (temp = nasm_instructions[instruction->opcode];
 761          temp->opcode != -1; temp++) {
 762         enum match_result m = matches(temp, instruction, bits);
 763         if (m == MOK_GOOD ||
 764             (m == MOK_JUMP && jmp_match(segment, offset, bits,
 765                                         instruction, temp->code))) {
 766             /* we've matched an instruction. */
 767             int64_t isize;
 768             const uint8_t *codes = temp->code;
 769             int j;
 770
 771             isize = calcsize(segment, offset, bits, instruction, codes);
 772             if (isize < 0)
 773                 return -1;
 774             for (j = 0; j < MAXPREFIX; j++) {
 775                 switch (instruction->prefixes[j]) {
 776                 case P_A16:
 777                     if (bits != 16)
 778                         isize++;
 779                     break;
 780                 case P_A32:
 781                     if (bits != 32)
 782                         isize++;
 783                     break;
 784                 case P_O16:
 785                     if (bits != 16)
 786                         isize++;
 787                     break;
 788                 case P_O32:
 789                     if (bits == 16)
 790                         isize++;
 791                     break;
 792                 case P_A64:
 793                 case P_O64:
 794                 case P_none:
 795                     break;
 796                 default:
 797                     isize++;
 798                     break;
 799                 }
 800             }
 801             return isize * instruction->times;
 802         }
 803     }
 804     return -1;                  /* didn't match any instruction */
 805 }
 806
 807 static bool possible_sbyte(operand *o)
 808 {
 809     return o->wrt == NO_SEG && o->segment == NO_SEG &&
 810         !(o->opflags & OPFLAG_UNKNOWN) &&
 811         optimizing >= 0 && !(o->type & STRICT);
 812 }
 813
 814 /* check that opn[op]  is a signed byte of size 16 or 32 */
 815 static bool is_sbyte16(operand *o)
 816 {
 817     int16_t v;
 818
 819     if (!possible_sbyte(o))
 820         return false;
 821
 822     v = o->offset;
 823     return v >= -128 && v <= 127;
 824 }
 825
 826 static bool is_sbyte32(operand *o)
 827 {
 828     int32_t v;
 829
 830     if (!possible_sbyte(o))
 831         return false;
 832
 833     v = o->offset;
 834     return v >= -128 && v <= 127;
 835 }
 836
 837 /* Common construct */
 838 #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
 839
 840 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 841                         insn * ins, const uint8_t *codes)
 842 {
 843     int64_t length = 0;
 844     uint8_t c;
 845     int rex_mask = ~0;
 846     int op1, op2;
 847     struct operand *opx;
 848     uint8_t opex = 0;
 849
 850     ins->rex = 0;               /* Ensure REX is reset */
 851
 852     if (ins->prefixes[PPS_OSIZE] == P_O64)
 853         ins->rex |= REX_W;
 854
 855     (void)segment;              /* Don't warn that this parameter is unused */
 856     (void)offset;               /* Don't warn that this parameter is unused */
 857
 858     while (*codes) {
 859         c = *codes++;
 860         op1 = (c & 3) + ((opex & 1) << 2);
 861         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
 862         opx = &ins->oprs[op1];
 863         opex = 0;               /* For the next iteration */
 864
 865         switch (c) {
 866         case 01:
 867         case 02:
 868         case 03:
 869         case 04:
 870             codes += c, length += c;
 871             break;
 872
 873         case 05:
 874         case 06:
 875         case 07:
 876             opex = c;
 877             break;
 878
 879         case4(010):
 880             ins->rex |=
 881                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 882             codes++, length++;
 883             break;
 884
 885         case4(014):
 886         case4(020):
 887         case4(024):
 888             length++;
 889             break;
 890
 891         case4(030):
 892             length += 2;
 893             break;
 894
 895         case4(034):
 896             if (opx->type & (BITS16 | BITS32 | BITS64))
 897                 length += (opx->type & BITS16) ? 2 : 4;
 898             else
 899                 length += (bits == 16) ? 2 : 4;
 900             break;
 901
 902         case4(040):
 903             length += 4;
 904             break;
 905
 906         case4(044):
 907             length += ins->addr_size >> 3;
 908             break;
 909
 910         case4(050):
 911             length++;
 912             break;
 913
 914         case4(054):
 915             length += 8; /* MOV reg64/imm */
 916             break;
 917
 918         case4(060):
 919             length += 2;
 920             break;
 921
 922         case4(064):
 923             if (opx->type & (BITS16 | BITS32 | BITS64))
 924                 length += (opx->type & BITS16) ? 2 : 4;
 925             else
 926                 length += (bits == 16) ? 2 : 4;
 927             break;
 928
 929         case4(070):
 930             length += 4;
 931             break;
 932
 933         case4(074):
 934             length += 2;
 935             break;
 936
 937         case4(0140):
 938             length += is_sbyte16(opx) ? 1 : 2;
 939             break;
 940
 941         case4(0144):
 942             codes++;
 943             length++;
 944             break;
 945
 946         case4(0150):
 947             length += is_sbyte32(opx) ? 1 : 4;
 948             break;
 949
 950         case4(0154):
 951             codes++;
 952             length++;
 953             break;
 954
 955         case4(0160):
 956             length++;
 957             ins->rex |= REX_D;
 958             ins->drexdst = regval(opx);
 959             break;
 960
 961         case4(0164):
 962             length++;
 963             ins->rex |= REX_D|REX_OC;
 964             ins->drexdst = regval(opx);
 965             break;
 966
 967         case 0171:
 968             break;
 969
 970         case 0172:
 971         case 0173:
 972         case 0174:
 973             codes++;
 974             length++;
 975             break;
 976
 977         case4(0250):
 978             length += is_sbyte32(opx) ? 1 : 4;
 979             break;
 980
 981         case4(0254):
 982             length += 4;
 983             break;
 984
 985         case4(0260):
 986             ins->rex |= REX_V;
 987             ins->drexdst = regval(opx);
 988             ins->vex_cm = *codes++;
 989             ins->vex_wlp = *codes++;
 990             break;
 991
 992         case 0270:
 993             ins->rex |= REX_V;
 994             ins->drexdst = 0;
 995             ins->vex_cm = *codes++;
 996             ins->vex_wlp = *codes++;
 997             break;
 998
 999         case4(0274):
1000             length++;
1001             break;
1002
1003         case4(0300):
1004             break;
1005
1006         case 0310:
1007             if (bits == 64)
1008                 return -1;
1009             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
1010             break;
1011
1012         case 0311:
1013             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
1014             break;
1015
1016         case 0312:
1017             break;
1018
1019         case 0313:
1020             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
1021                 has_prefix(ins, PPS_ASIZE, P_A32))
1022                 return -1;
1023             break;
1024
1025         case4(0314):
1026             break;
1027
1028         case 0320:
1029             length += (bits != 16);
1030             break;
1031
1032         case 0321:
1033             length += (bits == 16);
1034             break;
1035
1036         case 0322:
1037             break;
1038
1039         case 0323:
1040             rex_mask &= ~REX_W;
1041             break;
1042
1043         case 0324:
1044             ins->rex |= REX_W;
1045             break;
1046
1047         case 0325:
1048             ins->rex |= REX_NH;
1049             break;
1050
1051         case 0330:
1052             codes++, length++;
1053             break;
1054
1055         case 0331:
1056             break;
1057
1058         case 0332:
1059         case 0333:
1060             length++;
1061             break;
1062
1063         case 0334:
1064             ins->rex |= REX_L;
1065             break;
1066
1067         case 0335:
1068             break;
1069
1070         case 0336:
1071             if (!ins->prefixes[PPS_LREP])
1072                 ins->prefixes[PPS_LREP] = P_REP;
1073             break;
1074
1075         case 0337:
1076             if (!ins->prefixes[PPS_LREP])
1077                 ins->prefixes[PPS_LREP] = P_REPNE;
1078             break;
1079
1080         case 0340:
1081             if (ins->oprs[0].segment != NO_SEG)
1082                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1083                         " quantity of BSS space");
1084             else
1085                 length += ins->oprs[0].offset;
1086             break;
1087
1088         case 0341:
1089             if (!ins->prefixes[PPS_WAIT])
1090                 ins->prefixes[PPS_WAIT] = P_WAIT;
1091             break;
1092
1093         case4(0344):
1094             length++;
1095             break;
1096
1097         case 0360:
1098             break;
1099
1100         case 0361:
1101         case 0362:
1102         case 0363:
1103             length++;
1104             break;
1105
1106         case 0364:
1107         case 0365:
1108             break;
1109
1110         case 0366:
1111         case 0367:
1112             length++;
1113             break;
1114
1115         case 0370:
1116         case 0371:
1117         case 0372:
1118             break;
1119
1120         case 0373:
1121             length++;
1122             break;
1123
1124         case4(0100):
1125         case4(0110):
1126         case4(0120):
1127         case4(0130):
1128         case4(0200):
1129         case4(0204):
1130         case4(0210):
1131         case4(0214):
1132         case4(0220):
1133         case4(0224):
1134         case4(0230):
1135         case4(0234):
1136             {
1137                 ea ea_data;
1138                 int rfield;
1139                 int32_t rflags;
1140                 struct operand *opy = &ins->oprs[op2];
1141
1142                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1143
1144                 if (c <= 0177) {
1145                     /* pick rfield from operand b (opx) */
1146                     rflags = regflag(opx);
1147                     rfield = nasm_regvals[opx->basereg];
1148                 } else {
1149                     rflags = 0;
1150                     rfield = c & 7;
1151                 }
1152                 if (!process_ea(opy, &ea_data, bits,
1153                                 ins->addr_size, rfield, rflags)) {
1154                     errfunc(ERR_NONFATAL, "invalid effective address");
1155                     return -1;
1156                 } else {
1157                     ins->rex |= ea_data.rex;
1158                     length += ea_data.size;
1159                 }
1160             }
1161             break;
1162
1163         default:
1164             errfunc(ERR_PANIC, "internal instruction table corrupt"
1165                     ": instruction code \\%o (0x%02X) given", c, c);
1166             break;
1167         }
1168     }
1169
1170     ins->rex &= rex_mask;
1171
1172     if (ins->rex & REX_NH) {
1173         if (ins->rex & REX_H) {
1174             errfunc(ERR_NONFATAL, "instruction cannot use high registers");
1175             return -1;
1176         }
1177         ins->rex &= ~REX_P;     /* Don't force REX prefix due to high reg */
1178     }
1179
1180     if (ins->rex & REX_V) {
1181         int bad32 = REX_R|REX_W|REX_X|REX_B;
1182
1183         if (ins->rex & REX_H) {
1184             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1185             return -1;
1186         }
1187         switch (ins->vex_wlp & 030) {
1188         case 000:
1189         case 020:
1190             ins->rex &= ~REX_W;
1191             break;
1192         case 010:
1193             ins->rex |= REX_W;
1194             bad32 &= ~REX_W;
1195             break;
1196         case 030:
1197             /* Follow REX_W */
1198             break;
1199         }
1200
1201         if (bits != 64 && ((ins->rex & bad32) || ins->drexdst > 7)) {
1202             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1203             return -1;
1204         }
1205         if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_R|REX_B)))
1206             length += 3;
1207         else
1208             length += 2;
1209     } else if (ins->rex & REX_D) {
1210         if (ins->rex & REX_H) {
1211             errfunc(ERR_NONFATAL, "cannot use high register in drex instruction");
1212             return -1;
1213         }
1214         if (bits != 64 && ((ins->rex & (REX_R|REX_W|REX_X|REX_B)) ||
1215                            ins->drexdst > 7)) {
1216             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1217             return -1;
1218         }
1219         length++;
1220     } else if (ins->rex & REX_REAL) {
1221         if (ins->rex & REX_H) {
1222             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1223             return -1;
1224         } else if (bits == 64) {
1225             length++;
1226         } else if ((ins->rex & REX_L) &&
1227                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1228                    cpu >= IF_X86_64) {
1229             /* LOCK-as-REX.R */
1230             assert_no_prefix(ins, PPS_LREP);
1231             length++;
1232         } else {
1233             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1234             return -1;
1235         }
1236     }
1237
1238     return length;
1239 }
1240
1241 #define EMIT_REX()                                                      \
1242     if (!(ins->rex & (REX_D|REX_V)) && (ins->rex & REX_REAL) && (bits == 64)) { \
1243         ins->rex = (ins->rex & REX_REAL)|REX_P;                         \
1244         out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG); \
1245         ins->rex = 0;                                                   \
1246         offset += 1; \
1247     }
1248
1249 static void gencode(int32_t segment, int64_t offset, int bits,
1250                     insn * ins, const struct itemplate *temp,
1251                     int64_t insn_end)
1252 {
1253     static char condval[] = {   /* conditional opcodes */
1254         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1255         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1256         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1257     };
1258     uint8_t c;
1259     uint8_t bytes[4];
1260     int64_t size;
1261     int64_t data;
1262     int op1, op2;
1263     struct operand *opx;
1264     const uint8_t *codes = temp->code;
1265     uint8_t opex = 0;
1266
1267     while (*codes) {
1268         c = *codes++;
1269         op1 = (c & 3) + ((opex & 1) << 2);
1270         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
1271         opx = &ins->oprs[op1];
1272         opex = 0;               /* For the next iteration */
1273
1274         switch (c) {
1275         case 01:
1276         case 02:
1277         case 03:
1278         case 04:
1279             EMIT_REX();
1280             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1281             codes += c;
1282             offset += c;
1283             break;
1284
1285         case 05:
1286         case 06:
1287         case 07:
1288             opex = c;
1289             break;
1290
1291         case4(010):
1292             EMIT_REX();
1293             bytes[0] = *codes++ + (regval(opx) & 7);
1294             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1295             offset += 1;
1296             break;
1297
1298         case4(014):
1299             /* The test for BITS8 and SBYTE here is intended to avoid
1300                warning on optimizer actions due to SBYTE, while still
1301                warn on explicit BYTE directives.  Also warn, obviously,
1302                if the optimizer isn't enabled. */
1303             if (((opx->type & BITS8) ||
1304                  !(opx->type & temp->opd[op1] & BYTENESS)) &&
1305                 (opx->offset < -128 || opx->offset > 127)) {
1306                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1307                         "signed byte value exceeds bounds");
1308             }
1309             if (opx->segment != NO_SEG) {
1310                 data = opx->offset;
1311                 out(offset, segment, &data, OUT_ADDRESS, 1,
1312                     opx->segment, opx->wrt);
1313             } else {
1314                 bytes[0] = opx->offset;
1315                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1316                     NO_SEG);
1317             }
1318             offset += 1;
1319             break;
1320
1321         case4(020):
1322             if (opx->offset < -256 || opx->offset > 255) {
1323                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1324                         "byte value exceeds bounds");
1325             }
1326             if (opx->segment != NO_SEG) {
1327                 data = opx->offset;
1328                 out(offset, segment, &data, OUT_ADDRESS, 1,
1329                     opx->segment, opx->wrt);
1330             } else {
1331                 bytes[0] = opx->offset;
1332                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1333                     NO_SEG);
1334             }
1335             offset += 1;
1336             break;
1337
1338         case4(024):
1339             if (opx->offset < 0 || opx->offset > 255)
1340                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1341                         "unsigned byte value exceeds bounds");
1342             if (opx->segment != NO_SEG) {
1343                 data = opx->offset;
1344                 out(offset, segment, &data, OUT_ADDRESS, 1,
1345                     opx->segment, opx->wrt);
1346             } else {
1347                 bytes[0] = opx->offset;
1348                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1349                     NO_SEG);
1350             }
1351             offset += 1;
1352             break;
1353
1354         case4(030):
1355             warn_overflow(2, opx);
1356             data = opx->offset;
1357             out(offset, segment, &data, OUT_ADDRESS, 2,
1358                 opx->segment, opx->wrt);
1359             offset += 2;
1360             break;
1361
1362         case4(034):
1363             if (opx->type & (BITS16 | BITS32))
1364                 size = (opx->type & BITS16) ? 2 : 4;
1365             else
1366                 size = (bits == 16) ? 2 : 4;
1367             warn_overflow(size, opx);
1368             data = opx->offset;
1369             out(offset, segment, &data, OUT_ADDRESS, size,
1370                 opx->segment, opx->wrt);
1371             offset += size;
1372             break;
1373
1374         case4(040):
1375             warn_overflow(4, opx);
1376             data = opx->offset;
1377             out(offset, segment, &data, OUT_ADDRESS, 4,
1378                 opx->segment, opx->wrt);
1379             offset += 4;
1380             break;
1381
1382         case4(044):
1383             data = opx->offset;
1384             size = ins->addr_size >> 3;
1385             warn_overflow(size, opx);
1386             out(offset, segment, &data, OUT_ADDRESS, size,
1387                 opx->segment, opx->wrt);
1388             offset += size;
1389             break;
1390
1391         case4(050):
1392             if (opx->segment != segment)
1393                 errfunc(ERR_NONFATAL,
1394                         "short relative jump outside segment");
1395             data = opx->offset - insn_end;
1396             if (data > 127 || data < -128)
1397                 errfunc(ERR_NONFATAL, "short jump is out of range");
1398             bytes[0] = data;
1399             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1400             offset += 1;
1401             break;
1402
1403         case4(054):
1404             data = (int64_t)opx->offset;
1405             out(offset, segment, &data, OUT_ADDRESS, 8,
1406                 opx->segment, opx->wrt);
1407             offset += 8;
1408             break;
1409
1410         case4(060):
1411             if (opx->segment != segment) {
1412                 data = opx->offset;
1413                 out(offset, segment, &data,
1414                     OUT_REL2ADR, insn_end - offset,
1415                     opx->segment, opx->wrt);
1416             } else {
1417                 data = opx->offset - insn_end;
1418                 out(offset, segment, &data,
1419                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1420             }
1421             offset += 2;
1422             break;
1423
1424         case4(064):
1425             if (opx->type & (BITS16 | BITS32 | BITS64))
1426                 size = (opx->type & BITS16) ? 2 : 4;
1427             else
1428                 size = (bits == 16) ? 2 : 4;
1429             if (opx->segment != segment) {
1430                 data = opx->offset;
1431                 out(offset, segment, &data,
1432                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1433                     insn_end - offset, opx->segment, opx->wrt);
1434             } else {
1435                 data = opx->offset - insn_end;
1436                 out(offset, segment, &data,
1437                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1438             }
1439             offset += size;
1440             break;
1441
1442         case4(070):
1443             if (opx->segment != segment) {
1444                 data = opx->offset;
1445                 out(offset, segment, &data,
1446                     OUT_REL4ADR, insn_end - offset,
1447                     opx->segment, opx->wrt);
1448             } else {
1449                 data = opx->offset - insn_end;
1450                 out(offset, segment, &data,
1451                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1452             }
1453             offset += 4;
1454             break;
1455
1456         case4(074):
1457             if (opx->segment == NO_SEG)
1458                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1459                         " relocatable");
1460             data = 0;
1461             out(offset, segment, &data, OUT_ADDRESS, 2,
1462                 outfmt->segbase(1 + opx->segment),
1463                 opx->wrt);
1464             offset += 2;
1465             break;
1466
1467         case4(0140):
1468             data = opx->offset;
1469             warn_overflow(2, opx);
1470             if (is_sbyte16(opx)) {
1471                 bytes[0] = data;
1472                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1473                     NO_SEG);
1474                 offset++;
1475             } else {
1476                 out(offset, segment, &data, OUT_ADDRESS, 2,
1477                     opx->segment, opx->wrt);
1478                 offset += 2;
1479             }
1480             break;
1481
1482         case4(0144):
1483             EMIT_REX();
1484             bytes[0] = *codes++;
1485             if (is_sbyte16(opx))
1486                 bytes[0] |= 2;  /* s-bit */
1487             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1488             offset++;
1489             break;
1490
1491         case4(0150):
1492             data = opx->offset;
1493             warn_overflow(4, opx);
1494             if (is_sbyte32(opx)) {
1495                 bytes[0] = data;
1496                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1497                     NO_SEG);
1498                 offset++;
1499             } else {
1500                 out(offset, segment, &data, OUT_ADDRESS, 4,
1501                     opx->segment, opx->wrt);
1502                 offset += 4;
1503             }
1504             break;
1505
1506         case4(0154):
1507             EMIT_REX();
1508             bytes[0] = *codes++;
1509             if (is_sbyte32(opx))
1510                 bytes[0] |= 2;  /* s-bit */
1511             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1512             offset++;
1513             break;
1514
1515         case4(0160):
1516         case4(0164):
1517             break;
1518
1519         case 0171:
1520             bytes[0] =
1521                 (ins->drexdst << 4) |
1522                 (ins->rex & REX_OC ? 0x08 : 0) |
1523                 (ins->rex & (REX_R|REX_X|REX_B));
1524             ins->rex = 0;
1525             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1526             offset++;
1527             break;
1528
1529         case 0172:
1530             c = *codes++;
1531             opx = &ins->oprs[c >> 3];
1532             bytes[0] = nasm_regvals[opx->basereg] << 4;
1533             opx = &ins->oprs[c & 7];
1534             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1535                 errfunc(ERR_NONFATAL,
1536                         "non-absolute expression not permitted as argument %d",
1537                         c & 7);
1538             } else {
1539                 if (opx->offset & ~15) {
1540                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1541                             "four-bit argument exceeds bounds");
1542                 }
1543                 bytes[0] |= opx->offset & 15;
1544             }
1545             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1546             offset++;
1547             break;
1548
1549         case 0173:
1550             c = *codes++;
1551             opx = &ins->oprs[c >> 4];
1552             bytes[0] = nasm_regvals[opx->basereg] << 4;
1553             bytes[0] |= c & 15;
1554             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1555             offset++;
1556             break;
1557
1558         case 0174:
1559             c = *codes++;
1560             opx = &ins->oprs[c];
1561             bytes[0] = nasm_regvals[opx->basereg] << 4;
1562             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1563             offset++;
1564             break;
1565
1566         case4(0250):
1567             data = opx->offset;
1568             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1569                 (int32_t)data != (int64_t)data) {
1570                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1571                         "signed dword immediate exceeds bounds");
1572             }
1573             if (is_sbyte32(opx)) {
1574                 bytes[0] = data;
1575                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1576                     NO_SEG);
1577                 offset++;
1578             } else {
1579                 out(offset, segment, &data, OUT_ADDRESS, 4,
1580                     opx->segment, opx->wrt);
1581                 offset += 4;
1582             }
1583             break;
1584
1585         case4(0254):
1586             data = opx->offset;
1587             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1588                 (int32_t)data != (int64_t)data) {
1589                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1590                         "signed dword immediate exceeds bounds");
1591             }
1592             out(offset, segment, &data, OUT_ADDRESS, 4,
1593                 opx->segment, opx->wrt);
1594             offset += 4;
1595             break;
1596
1597         case4(0260):
1598         case 0270:
1599             codes += 2;
1600             if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1601                 bytes[0] = (ins->vex_cm >> 6) ? 0x8f : 0xc4;
1602                 bytes[1] = (ins->vex_cm & 31) | ((~ins->rex & 7) << 5);
1603                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1604                     ((~ins->drexdst & 15)<< 3) | (ins->vex_wlp & 07);
1605                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1606                 offset += 3;
1607             } else {
1608                 bytes[0] = 0xc5;
1609                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1610                     ((~ins->drexdst & 15) << 3) | (ins->vex_wlp & 07);
1611                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1612                 offset += 2;
1613             }
1614             break;
1615
1616         case4(0274):
1617         {
1618             uint64_t uv, um;
1619             int s;
1620
1621             if (ins->rex & REX_W)
1622                 s = 64;
1623             else if (ins->prefixes[PPS_OSIZE] == P_O16)
1624                 s = 16;
1625             else if (ins->prefixes[PPS_OSIZE] == P_O32)
1626                 s = 32;
1627             else
1628                 s = bits;
1629
1630             um = (uint64_t)2 << (s-1);
1631             uv = opx->offset;
1632
1633             if (uv > 127 && uv < (uint64_t)-128 &&
1634                 (uv < um-128 || uv > um-1)) {
1635                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1636                         "signed byte value exceeds bounds");
1637             }
1638             if (opx->segment != NO_SEG) {
1639                 data = uv;
1640                 out(offset, segment, &data, OUT_ADDRESS, 1,
1641                     opx->segment, opx->wrt);
1642             } else {
1643                 bytes[0] = uv;
1644                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1645                     NO_SEG);
1646             }
1647             offset += 1;
1648             break;
1649         }
1650
1651         case4(0300):
1652             break;
1653
1654         case 0310:
1655             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1656                 *bytes = 0x67;
1657                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1658                 offset += 1;
1659             } else
1660                 offset += 0;
1661             break;
1662
1663         case 0311:
1664             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1665                 *bytes = 0x67;
1666                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1667                 offset += 1;
1668             } else
1669                 offset += 0;
1670             break;
1671
1672         case 0312:
1673             break;
1674
1675         case 0313:
1676             ins->rex = 0;
1677             break;
1678
1679         case4(0314):
1680             break;
1681
1682         case 0320:
1683             if (bits != 16) {
1684                 *bytes = 0x66;
1685                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1686                 offset += 1;
1687             } else
1688                 offset += 0;
1689             break;
1690
1691         case 0321:
1692             if (bits == 16) {
1693                 *bytes = 0x66;
1694                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1695                 offset += 1;
1696             } else
1697                 offset += 0;
1698             break;
1699
1700         case 0322:
1701         case 0323:
1702             break;
1703
1704         case 0324:
1705             ins->rex |= REX_W;
1706             break;
1707
1708         case 0325:
1709             break;
1710
1711         case 0330:
1712             *bytes = *codes++ ^ condval[ins->condition];
1713             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1714             offset += 1;
1715             break;
1716
1717         case 0331:
1718             break;
1719
1720         case 0332:
1721         case 0333:
1722             *bytes = c - 0332 + 0xF2;
1723             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1724             offset += 1;
1725             break;
1726
1727         case 0334:
1728             if (ins->rex & REX_R) {
1729                 *bytes = 0xF0;
1730                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1731                 offset += 1;
1732             }
1733             ins->rex &= ~(REX_L|REX_R);
1734             break;
1735
1736         case 0335:
1737             break;
1738
1739         case 0336:
1740         case 0337:
1741             break;
1742
1743         case 0340:
1744             if (ins->oprs[0].segment != NO_SEG)
1745                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1746             else {
1747                 int64_t size = ins->oprs[0].offset;
1748                 if (size > 0)
1749                     out(offset, segment, NULL,
1750                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1751                 offset += size;
1752             }
1753             break;
1754
1755         case 0341:
1756             break;
1757
1758         case 0344:
1759         case 0345:
1760             bytes[0] = c & 1;
1761             switch (ins->oprs[0].basereg) {
1762             case R_CS:
1763                 bytes[0] += 0x0E;
1764                 break;
1765             case R_DS:
1766                 bytes[0] += 0x1E;
1767                 break;
1768             case R_ES:
1769                 bytes[0] += 0x06;
1770                 break;
1771             case R_SS:
1772                 bytes[0] += 0x16;
1773                 break;
1774             default:
1775                 errfunc(ERR_PANIC,
1776                         "bizarre 8086 segment register received");
1777             }
1778             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1779             offset++;
1780             break;
1781
1782         case 0346:
1783         case 0347:
1784             bytes[0] = c & 1;
1785             switch (ins->oprs[0].basereg) {
1786             case R_FS:
1787                 bytes[0] += 0xA0;
1788                 break;
1789             case R_GS:
1790                 bytes[0] += 0xA8;
1791                 break;
1792             default:
1793                 errfunc(ERR_PANIC,
1794                         "bizarre 386 segment register received");
1795             }
1796             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1797             offset++;
1798             break;
1799
1800         case 0360:
1801             break;
1802
1803         case 0361:
1804             bytes[0] = 0x66;
1805             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1806             offset += 1;
1807             break;
1808
1809         case 0362:
1810         case 0363:
1811             bytes[0] = c - 0362 + 0xf2;
1812             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1813             offset += 1;
1814             break;
1815
1816         case 0364:
1817         case 0365:
1818             break;
1819
1820         case 0366:
1821         case 0367:
1822             *bytes = c - 0366 + 0x66;
1823             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1824             offset += 1;
1825             break;
1826
1827         case 0370:
1828         case 0371:
1829         case 0372:
1830             break;
1831
1832         case 0373:
1833             *bytes = bits == 16 ? 3 : 5;
1834             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1835             offset += 1;
1836             break;
1837
1838         case4(0100):
1839         case4(0110):
1840         case4(0120):
1841         case4(0130):
1842         case4(0200):
1843         case4(0204):
1844         case4(0210):
1845         case4(0214):
1846         case4(0220):
1847         case4(0224):
1848         case4(0230):
1849         case4(0234):
1850             {
1851                 ea ea_data;
1852                 int rfield;
1853                 int32_t rflags;
1854                 uint8_t *p;
1855                 int32_t s;
1856                 enum out_type type;
1857                 struct operand *opy = &ins->oprs[op2];
1858
1859                 if (c <= 0177) {
1860                     /* pick rfield from operand b (opx) */
1861                     rflags = regflag(opx);
1862                     rfield = nasm_regvals[opx->basereg];
1863                 } else {
1864                     /* rfield is constant */
1865                     rflags = 0;
1866                     rfield = c & 7;
1867                 }
1868
1869                 if (!process_ea(opy, &ea_data, bits, ins->addr_size,
1870                                 rfield, rflags)) {
1871                     errfunc(ERR_NONFATAL, "invalid effective address");
1872                 }
1873
1874
1875                 p = bytes;
1876                 *p++ = ea_data.modrm;
1877                 if (ea_data.sib_present)
1878                     *p++ = ea_data.sib;
1879
1880                 /* DREX suffixes come between the SIB and the displacement */
1881                 if (ins->rex & REX_D) {
1882                     *p++ = (ins->drexdst << 4) |
1883                            (ins->rex & REX_OC ? 0x08 : 0) |
1884                            (ins->rex & (REX_R|REX_X|REX_B));
1885                     ins->rex = 0;
1886                 }
1887
1888                 s = p - bytes;
1889                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1890
1891                 /*
1892                  * Make sure the address gets the right offset in case
1893                  * the line breaks in the .lst file (BR 1197827)
1894                  */
1895                 offset += s;
1896                 s = 0;
1897
1898                 switch (ea_data.bytes) {
1899                 case 0:
1900                     break;
1901                 case 1:
1902                 case 2:
1903                 case 4:
1904                 case 8:
1905                     data = opy->offset;
1906                     warn_overflow(ea_data.bytes, opy);
1907                     s += ea_data.bytes;
1908                     if (ea_data.rip) {
1909                         if (opy->segment == segment) {
1910                             data -= insn_end;
1911                             out(offset, segment, &data, OUT_ADDRESS,
1912                                 ea_data.bytes, NO_SEG, NO_SEG);
1913                         } else {
1914                             out(offset, segment, &data, OUT_REL4ADR,
1915                                 insn_end - offset, opy->segment, opy->wrt);
1916                         }
1917                     } else {
1918                         type = OUT_ADDRESS;
1919                         out(offset, segment, &data, OUT_ADDRESS,
1920                             ea_data.bytes, opy->segment, opy->wrt);
1921                     }
1922                     break;
1923                 default:
1924                     /* Impossible! */
1925                     errfunc(ERR_PANIC,
1926                             "Invalid amount of bytes (%d) for offset?!",
1927                             ea_data.bytes);
1928                     break;
1929                 }
1930                 offset += s;
1931             }
1932             break;
1933
1934         default:
1935             errfunc(ERR_PANIC, "internal instruction table corrupt"
1936                     ": instruction code \\%o (0x%02X) given", c, c);
1937             break;
1938         }
1939     }
1940 }
1941
1942 static int32_t regflag(const operand * o)
1943 {
1944     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1945         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1946     }
1947     return nasm_reg_flags[o->basereg];
1948 }
1949
1950 static int32_t regval(const operand * o)
1951 {
1952     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1953         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1954     }
1955     return nasm_regvals[o->basereg];
1956 }
1957
1958 static int op_rexflags(const operand * o, int mask)
1959 {
1960     int32_t flags;
1961     int val;
1962
1963     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1964         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1965     }
1966
1967     flags = nasm_reg_flags[o->basereg];
1968     val = nasm_regvals[o->basereg];
1969
1970     return rexflags(val, flags, mask);
1971 }
1972
1973 static int rexflags(int val, int32_t flags, int mask)
1974 {
1975     int rex = 0;
1976
1977     if (val >= 8)
1978         rex |= REX_B|REX_X|REX_R;
1979     if (flags & BITS64)
1980         rex |= REX_W;
1981     if (!(REG_HIGH & ~flags))   /* AH, CH, DH, BH */
1982         rex |= REX_H;
1983     else if (!(REG8 & ~flags) && val >= 4) /* SPL, BPL, SIL, DIL */
1984         rex |= REX_P;
1985
1986     return rex & mask;
1987 }
1988
1989 static enum match_result matches(const struct itemplate *itemp,
1990                                  insn *instruction, int bits)
1991 {
1992     int i, size[MAX_OPERANDS], asize, oprs, ret;
1993
1994     ret = MOK_GOOD;
1995
1996     /*
1997      * Check the opcode
1998      */
1999     if (itemp->opcode != instruction->opcode)
2000         return MERR_INVALOP;
2001
2002     /*
2003      * Count the operands
2004      */
2005     if (itemp->operands != instruction->operands)
2006         return MERR_INVALOP;
2007
2008     /*
2009      * Check that no spurious colons or TOs are present
2010      */
2011     for (i = 0; i < itemp->operands; i++)
2012         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
2013             return MERR_INVALOP;
2014
2015     /*
2016      * Process size flags
2017      */
2018     if (itemp->flags & IF_ARMASK) {
2019         memset(size, 0, sizeof size);
2020
2021         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
2022
2023         switch (itemp->flags & IF_SMASK) {
2024         case IF_SB:
2025             size[i] = BITS8;
2026             break;
2027         case IF_SW:
2028             size[i] = BITS16;
2029             break;
2030         case IF_SD:
2031             size[i] = BITS32;
2032             break;
2033         case IF_SQ:
2034             size[i] = BITS64;
2035             break;
2036         case IF_SO:
2037             size[i] = BITS128;
2038             break;
2039         case IF_SY:
2040             size[i] = BITS256;
2041             break;
2042         case IF_SZ:
2043             switch (bits) {
2044             case 16:
2045                 size[i] = BITS16;
2046                 break;
2047             case 32:
2048                 size[i] = BITS32;
2049                 break;
2050             case 64:
2051                 size[i] = BITS64;
2052                 break;
2053             }
2054             break;
2055         default:
2056             break;
2057         }
2058     } else {
2059         asize = 0;
2060         switch (itemp->flags & IF_SMASK) {
2061         case IF_SB:
2062             asize = BITS8;
2063             break;
2064         case IF_SW:
2065             asize = BITS16;
2066             break;
2067         case IF_SD:
2068             asize = BITS32;
2069             break;
2070         case IF_SQ:
2071             asize = BITS64;
2072             break;
2073         case IF_SO:
2074             asize = BITS128;
2075             break;
2076         case IF_SY:
2077             asize = BITS256;
2078             break;
2079         case IF_SZ:
2080             switch (bits) {
2081             case 16:
2082                 asize = BITS16;
2083                 break;
2084             case 32:
2085                 asize = BITS32;
2086                 break;
2087             case 64:
2088                 asize = BITS64;
2089                 break;
2090             }
2091             break;
2092         default:
2093             break;
2094         }
2095         for (i = 0; i < MAX_OPERANDS; i++)
2096             size[i] = asize;
2097     }
2098
2099     /*
2100      * Check that the operand flags all match up
2101      */
2102     for (i = 0; i < itemp->operands; i++) {
2103         int32_t type = instruction->oprs[i].type;
2104         if (!(type & SIZE_MASK))
2105             type |= size[i];
2106
2107         if (itemp->opd[i] & SAME_AS) {
2108             int j = itemp->opd[i] & ~SAME_AS;
2109             if (type != instruction->oprs[j].type ||
2110                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2111                 return MERR_INVALOP;
2112         } else if (itemp->opd[i] & ~type ||
2113             ((itemp->opd[i] & SIZE_MASK) &&
2114              ((itemp->opd[i] ^ type) & SIZE_MASK))) {
2115             if ((itemp->opd[i] & ~type & ~SIZE_MASK) ||
2116                 (type & SIZE_MASK))
2117                 return MERR_INVALOP;
2118             else
2119                 return MERR_OPSIZEMISSING;
2120         }
2121     }
2122
2123     /*
2124      * Check operand sizes
2125      */
2126     if (itemp->flags & (IF_SM | IF_SM2)) {
2127         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2128         asize = 0;
2129         for (i = 0; i < oprs; i++) {
2130             if ((asize = itemp->opd[i] & SIZE_MASK) != 0) {
2131                 int j;
2132                 for (j = 0; j < oprs; j++)
2133                     size[j] = asize;
2134                 break;
2135             }
2136         }
2137     } else {
2138         oprs = itemp->operands;
2139     }
2140
2141     for (i = 0; i < itemp->operands; i++) {
2142         if (!(itemp->opd[i] & SIZE_MASK) &&
2143             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2144             return MERR_OPSIZEMISMATCH;
2145     }
2146
2147     /*
2148      * Check template is okay at the set cpu level
2149      */
2150     if (((itemp->flags & IF_PLEVEL) > cpu))
2151         return MERR_BADCPU;
2152
2153     /*
2154      * Verify the appropriate long mode flag.
2155      */
2156     if ((itemp->flags & (bits == 64 ? IF_NOLONG : IF_LONG)))
2157         return MERR_BADMODE;
2158
2159     /*
2160      * Check if special handling needed for Jumps
2161      */
2162     if ((uint8_t)(itemp->code[0]) >= 0370)
2163         return MOK_JUMP;
2164
2165     return ret;
2166 }
2167
2168 static ea *process_ea(operand * input, ea * output, int bits,
2169                       int addrbits, int rfield, int32_t rflags)
2170 {
2171     bool forw_ref = !!(input->opflags & OPFLAG_UNKNOWN);
2172
2173     output->rip = false;
2174
2175     /* REX flags for the rfield operand */
2176     output->rex |= rexflags(rfield, rflags, REX_R|REX_P|REX_W|REX_H);
2177
2178     if (!(REGISTER & ~input->type)) {   /* register direct */
2179         int i;
2180         int32_t f;
2181
2182         if (input->basereg < EXPR_REG_START /* Verify as Register */
2183             || input->basereg >= REG_ENUM_LIMIT)
2184             return NULL;
2185         f = regflag(input);
2186         i = nasm_regvals[input->basereg];
2187
2188         if (REG_EA & ~f)
2189             return NULL;        /* Invalid EA register */
2190
2191         output->rex |= op_rexflags(input, REX_B|REX_P|REX_W|REX_H);
2192
2193         output->sib_present = false;             /* no SIB necessary */
2194         output->bytes = 0;  /* no offset necessary either */
2195         output->modrm = 0xC0 | ((rfield & 7) << 3) | (i & 7);
2196     } else {                    /* it's a memory reference */
2197         if (input->basereg == -1
2198             && (input->indexreg == -1 || input->scale == 0)) {
2199             /* it's a pure offset */
2200             if (bits == 64 && (~input->type & IP_REL)) {
2201               int scale, index, base;
2202               output->sib_present = true;
2203               scale = 0;
2204               index = 4;
2205               base = 5;
2206               output->sib = (scale << 6) | (index << 3) | base;
2207               output->bytes = 4;
2208               output->modrm = 4 | ((rfield & 7) << 3);
2209               output->rip = false;
2210             } else {
2211               output->sib_present = false;
2212               output->bytes = (addrbits != 16 ? 4 : 2);
2213               output->modrm = (addrbits != 16 ? 5 : 6) | ((rfield & 7) << 3);
2214               output->rip = bits == 64;
2215             }
2216         } else {                /* it's an indirection */
2217             int i = input->indexreg, b = input->basereg, s = input->scale;
2218             int32_t o = input->offset, seg = input->segment;
2219             int hb = input->hintbase, ht = input->hinttype;
2220             int t;
2221             int it, bt;
2222             int32_t ix, bx;     /* register flags */
2223
2224             if (s == 0)
2225                 i = -1;         /* make this easy, at least */
2226
2227             if (i >= EXPR_REG_START && i < REG_ENUM_LIMIT) {
2228                 it = nasm_regvals[i];
2229                 ix = nasm_reg_flags[i];
2230             } else {
2231                 it = -1;
2232                 ix = 0;
2233             }
2234
2235             if (b >= EXPR_REG_START && b < REG_ENUM_LIMIT) {
2236                 bt = nasm_regvals[b];
2237                 bx = nasm_reg_flags[b];
2238             } else {
2239                 bt = -1;
2240                 bx = 0;
2241             }
2242
2243             /* check for a 32/64-bit memory reference... */
2244             if ((ix|bx) & (BITS32|BITS64)) {
2245                 /* it must be a 32/64-bit memory reference. Firstly we have
2246                  * to check that all registers involved are type E/Rxx. */
2247                 int32_t sok = BITS32|BITS64;
2248
2249                 if (it != -1) {
2250                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2251                         sok &= ix;
2252                     else
2253                         return NULL;
2254                 }
2255
2256                 if (bt != -1) {
2257                     if (REG_GPR & ~bx)
2258                         return NULL; /* Invalid register */
2259                     if (~sok & bx & SIZE_MASK)
2260                         return NULL; /* Invalid size */
2261                     sok &= bx;
2262                 }
2263
2264                 /* While we're here, ensure the user didn't specify
2265                    WORD or QWORD. */
2266                 if (input->disp_size == 16 || input->disp_size == 64)
2267                     return NULL;
2268
2269                 if (addrbits == 16 ||
2270                     (addrbits == 32 && !(sok & BITS32)) ||
2271                     (addrbits == 64 && !(sok & BITS64)))
2272                     return NULL;
2273
2274                 /* now reorganize base/index */
2275                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2276                     ((hb == b && ht == EAH_NOTBASE)
2277                      || (hb == i && ht == EAH_MAKEBASE))) {
2278                     /* swap if hints say so */
2279                     t = bt, bt = it, it = t;
2280                     t = bx, bx = ix, ix = t;
2281                 }
2282                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2283                     bt = -1, bx = 0, s++;
2284                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2285                     /* make single reg base, unless hint */
2286                     bt = it, bx = ix, it = -1, ix = 0;
2287                 }
2288                 if (((s == 2 && it != REG_NUM_ESP
2289                       && !(input->eaflags & EAF_TIMESTWO)) || s == 3
2290                      || s == 5 || s == 9) && bt == -1)
2291                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2292                 if (it == -1 && (bt & 7) != REG_NUM_ESP
2293                     && (input->eaflags & EAF_TIMESTWO))
2294                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2295                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2296                 if (s == 1 && it == REG_NUM_ESP) {
2297                     /* swap ESP into base if scale is 1 */
2298                     t = it, it = bt, bt = t;
2299                     t = ix, ix = bx, bx = t;
2300                 }
2301                 if (it == REG_NUM_ESP
2302                     || (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2303                     return NULL;        /* wrong, for various reasons */
2304
2305                 output->rex |= rexflags(it, ix, REX_X);
2306                 output->rex |= rexflags(bt, bx, REX_B);
2307
2308                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2309                     /* no SIB needed */
2310                     int mod, rm;
2311
2312                     if (bt == -1) {
2313                         rm = 5;
2314                         mod = 0;
2315                     } else {
2316                         rm = (bt & 7);
2317                         if (rm != REG_NUM_EBP && o == 0 &&
2318                                 seg == NO_SEG && !forw_ref &&
2319                                 !(input->eaflags &
2320                                   (EAF_BYTEOFFS | EAF_WORDOFFS)))
2321                             mod = 0;
2322                         else if (input->eaflags & EAF_BYTEOFFS ||
2323                                  (o >= -128 && o <= 127 && seg == NO_SEG
2324                                   && !forw_ref
2325                                   && !(input->eaflags & EAF_WORDOFFS)))
2326                             mod = 1;
2327                         else
2328                             mod = 2;
2329                     }
2330
2331                     output->sib_present = false;
2332                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2333                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2334                 } else {
2335                     /* we need a SIB */
2336                     int mod, scale, index, base;
2337
2338                     if (it == -1)
2339                         index = 4, s = 1;
2340                     else
2341                         index = (it & 7);
2342
2343                     switch (s) {
2344                     case 1:
2345                         scale = 0;
2346                         break;
2347                     case 2:
2348                         scale = 1;
2349                         break;
2350                     case 4:
2351                         scale = 2;
2352                         break;
2353                     case 8:
2354                         scale = 3;
2355                         break;
2356                     default:   /* then what the smeg is it? */
2357                         return NULL;    /* panic */
2358                     }
2359
2360                     if (bt == -1) {
2361                         base = 5;
2362                         mod = 0;
2363                     } else {
2364                         base = (bt & 7);
2365                         if (base != REG_NUM_EBP && o == 0 &&
2366                                     seg == NO_SEG && !forw_ref &&
2367                                     !(input->eaflags &
2368                                       (EAF_BYTEOFFS | EAF_WORDOFFS)))
2369                             mod = 0;
2370                         else if (input->eaflags & EAF_BYTEOFFS ||
2371                                  (o >= -128 && o <= 127 && seg == NO_SEG
2372                                   && !forw_ref
2373                                   && !(input->eaflags & EAF_WORDOFFS)))
2374                             mod = 1;
2375                         else
2376                             mod = 2;
2377                     }
2378
2379                     output->sib_present = true;
2380                     output->bytes =  (bt == -1 || mod == 2 ? 4 : mod);
2381                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | 4;
2382                     output->sib = (scale << 6) | (index << 3) | base;
2383                 }
2384             } else {            /* it's 16-bit */
2385                 int mod, rm;
2386
2387                 /* check for 64-bit long mode */
2388                 if (addrbits == 64)
2389                     return NULL;
2390
2391                 /* check all registers are BX, BP, SI or DI */
2392                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI
2393                      && b != R_DI) || (i != -1 && i != R_BP && i != R_BX
2394                                        && i != R_SI && i != R_DI))
2395                     return NULL;
2396
2397                 /* ensure the user didn't specify DWORD/QWORD */
2398                 if (input->disp_size == 32 || input->disp_size == 64)
2399                     return NULL;
2400
2401                 if (s != 1 && i != -1)
2402                     return NULL;        /* no can do, in 16-bit EA */
2403                 if (b == -1 && i != -1) {
2404                     int tmp = b;
2405                     b = i;
2406                     i = tmp;
2407                 }               /* swap */
2408                 if ((b == R_SI || b == R_DI) && i != -1) {
2409                     int tmp = b;
2410                     b = i;
2411                     i = tmp;
2412                 }
2413                 /* have BX/BP as base, SI/DI index */
2414                 if (b == i)
2415                     return NULL;        /* shouldn't ever happen, in theory */
2416                 if (i != -1 && b != -1 &&
2417                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2418                     return NULL;        /* invalid combinations */
2419                 if (b == -1)    /* pure offset: handled above */
2420                     return NULL;        /* so if it gets to here, panic! */
2421
2422                 rm = -1;
2423                 if (i != -1)
2424                     switch (i * 256 + b) {
2425                     case R_SI * 256 + R_BX:
2426                         rm = 0;
2427                         break;
2428                     case R_DI * 256 + R_BX:
2429                         rm = 1;
2430                         break;
2431                     case R_SI * 256 + R_BP:
2432                         rm = 2;
2433                         break;
2434                     case R_DI * 256 + R_BP:
2435                         rm = 3;
2436                         break;
2437                 } else
2438                     switch (b) {
2439                     case R_SI:
2440                         rm = 4;
2441                         break;
2442                     case R_DI:
2443                         rm = 5;
2444                         break;
2445                     case R_BP:
2446                         rm = 6;
2447                         break;
2448                     case R_BX:
2449                         rm = 7;
2450                         break;
2451                     }
2452                 if (rm == -1)   /* can't happen, in theory */
2453                     return NULL;        /* so panic if it does */
2454
2455                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2456                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2457                     mod = 0;
2458                 else if (input->eaflags & EAF_BYTEOFFS ||
2459                          (o >= -128 && o <= 127 && seg == NO_SEG
2460                           && !forw_ref
2461                           && !(input->eaflags & EAF_WORDOFFS)))
2462                     mod = 1;
2463                 else
2464                     mod = 2;
2465
2466                 output->sib_present = false;    /* no SIB - it's 16-bit */
2467                 output->bytes = mod;    /* bytes of offset needed */
2468                 output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2469             }
2470         }
2471     }
2472
2473     output->size = 1 + output->sib_present + output->bytes;
2474     return output;
2475 }
2476
2477 static void add_asp(insn *ins, int addrbits)
2478 {
2479     int j, valid;
2480     int defdisp;
2481
2482     valid = (addrbits == 64) ? 64|32 : 32|16;
2483
2484     switch (ins->prefixes[PPS_ASIZE]) {
2485     case P_A16:
2486         valid &= 16;
2487         break;
2488     case P_A32:
2489         valid &= 32;
2490         break;
2491     case P_A64:
2492         valid &= 64;
2493         break;
2494     case P_ASP:
2495         valid &= (addrbits == 32) ? 16 : 32;
2496         break;
2497     default:
2498         break;
2499     }
2500
2501     for (j = 0; j < ins->operands; j++) {
2502         if (!(MEMORY & ~ins->oprs[j].type)) {
2503             int32_t i, b;
2504
2505             /* Verify as Register */
2506             if (ins->oprs[j].indexreg < EXPR_REG_START
2507                 || ins->oprs[j].indexreg >= REG_ENUM_LIMIT)
2508                 i = 0;
2509             else
2510                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2511
2512             /* Verify as Register */
2513             if (ins->oprs[j].basereg < EXPR_REG_START
2514                 || ins->oprs[j].basereg >= REG_ENUM_LIMIT)
2515                 b = 0;
2516             else
2517                 b = nasm_reg_flags[ins->oprs[j].basereg];
2518
2519             if (ins->oprs[j].scale == 0)
2520                 i = 0;
2521
2522             if (!i && !b) {
2523                 int ds = ins->oprs[j].disp_size;
2524                 if ((addrbits != 64 && ds > 8) ||
2525                     (addrbits == 64 && ds == 16))
2526                     valid &= ds;
2527             } else {
2528                 if (!(REG16 & ~b))
2529                     valid &= 16;
2530                 if (!(REG32 & ~b))
2531                     valid &= 32;
2532                 if (!(REG64 & ~b))
2533                     valid &= 64;
2534
2535                 if (!(REG16 & ~i))
2536                     valid &= 16;
2537                 if (!(REG32 & ~i))
2538                     valid &= 32;
2539                 if (!(REG64 & ~i))
2540                     valid &= 64;
2541             }
2542         }
2543     }
2544
2545     if (valid & addrbits) {
2546         ins->addr_size = addrbits;
2547     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2548         /* Add an address size prefix */
2549         enum prefixes pref = (addrbits == 32) ? P_A16 : P_A32;
2550         ins->prefixes[PPS_ASIZE] = pref;
2551         ins->addr_size = (addrbits == 32) ? 16 : 32;
2552     } else {
2553         /* Impossible... */
2554         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2555         ins->addr_size = addrbits; /* Error recovery */
2556     }
2557
2558     defdisp = ins->addr_size == 16 ? 16 : 32;
2559
2560     for (j = 0; j < ins->operands; j++) {
2561         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2562             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp)
2563             != ins->addr_size) {
2564             /* mem_offs sizes must match the address size; if not,
2565                strip the MEM_OFFS bit and match only EA instructions */
2566             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2567         }
2568     }
2569 }