assemble.c

   1 /* assemble.c   code generation for the Netwide Assembler
   2  *
   3  * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
   4  * Julian Hall. All rights reserved. The software is
   5  * redistributable under the licence given in the file "Licence"
   6  * distributed in the NASM archive.
   7  *
   8  * the actual codes (C syntax, i.e. octal):
   9  * \0            - terminates the code. (Unless it's a literal of course.)
  10  * \1, \2, \3    - that many literal bytes follow in the code stream
  11  * \4, \6        - the POP/PUSH (respectively) codes for CS, DS, ES, SS
  12  *                 (POP is never used for CS) depending on operand 0
  13  * \5, \7        - the second byte of POP/PUSH codes for FS, GS, depending
  14  *                 on operand 0
  15  * \10..\13      - a literal byte follows in the code stream, to be added
  16  *                 to the register value of operand 0..3
  17  * \14..\17      - a signed byte immediate operand, from operand 0..3
  18  * \20..\23      - a byte immediate operand, from operand 0..3
  19  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  20  * \30..\33      - a word immediate operand, from operand 0..3
  21  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  22  *                 assembly mode or the operand-size override on the operand
  23  * \40..\43      - a long immediate operand, from operand 0..3
  24  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  25  *                 depending on the address size of the instruction.
  26  * \50..\53      - a byte relative operand, from operand 0..3
  27  * \54..\57      - a qword immediate operand, from operand 0..3
  28  * \60..\63      - a word relative operand, from operand 0..3
  29  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  30  *                 assembly mode or the operand-size override on the operand
  31  * \70..\73      - a long relative operand, from operand 0..3
  32  * \74..\77       - a word constant, from the _segment_ part of operand 0..3
  33  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  34  *                 field the register value of operand b.
  35  * \140..\143    - an immediate word or signed byte for operand 0..3
  36  * \144..\147    - or 2 (s-field) into next opcode byte if operand 0..3
  37  *                  is a signed byte rather than a word.
  38  * \150..\153     - an immediate dword or signed byte for operand 0..3
  39  * \154..\157     - or 2 (s-field) into next opcode byte if operand 0..3
  40  *                  is a signed byte rather than a dword.
  41  * \160..\163    - this instruction uses DREX rather than REX, with the
  42  *                 OC0 field set to 0, and the dest field taken from
  43  *                 operand 0..3.
  44  * \164..\167    - this instruction uses DREX rather than REX, with the
  45  *                 OC0 field set to 1, and the dest field taken from
  46  *                 operand 0..3.
  47  * \170          - encodes the literal byte 0. (Some compilers don't take
  48  *                 kindly to a zero byte in the _middle_ of a compile time
  49  *                 string constant, so I had to put this hack in.)
  50  * \171          - placement of DREX suffix in the absence of an EA
  51  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  52  *                 field equal to digit b.
  53  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
  54  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
  55  * \312          - (disassembler only) marker on LOOP, LOOPxx instructions.
  56  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
  57  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
  58  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
  59  * \322          - indicates that this instruction is only valid when the
  60  *                 operand size is the default (instruction to disassembler,
  61  *                 generates no code in the assembler)
  62  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
  63  * \324          - indicates 64-bit operand size requiring REX prefix.
  64  * \330          - a literal byte follows in the code stream, to be added
  65  *                 to the condition code value of the instruction.
  66  * \331          - instruction not valid with REP prefix.  Hint for
  67  *                 disassembler only; for SSE instructions.
  68  * \332          - REP prefix (0xF2 byte) used as opcode extension.
  69  * \333          - REP prefix (0xF3 byte) used as opcode extension.
  70  * \334          - LOCK prefix used instead of REX.R
  71  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
  72  * \340          - reserve <operand 0> bytes of uninitialized storage.
  73  *                 Operand 0 had better be a segmentless constant.
  74  * \364          - operand-size prefix (0x66) not permitted
  75  * \365          - address-size prefix (0x67) not permitted
  76  * \366          - operand-size prefix (0x66) used as opcode extension
  77  * \367          - address-size prefix (0x67) used as opcode extension
  78  * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
  79  *                 370 is used for Jcc, 371 is used for JMP.
  80  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
  81  *                 used for conditional jump over longer jump
  82  */
  83
  84 #include "compiler.h"
  85
  86 #include <stdio.h>
  87 #include <string.h>
  88 #include <inttypes.h>
  89
  90 #include "nasm.h"
  91 #include "nasmlib.h"
  92 #include "assemble.h"
  93 #include "insns.h"
  94 #include "preproc.h"
  95 #include "regflags.c"
  96 #include "regvals.c"
  97
  98 typedef struct {
  99     int sib_present;                 /* is a SIB byte necessary? */
 100     int bytes;                       /* # of bytes of offset needed */
 101     int size;                        /* lazy - this is sib+bytes+1 */
 102     uint8_t modrm, sib, rex, rip;    /* the bytes themselves */
 103 } ea;
 104
 105 static uint32_t cpu;            /* cpu level received from nasm.c */
 106 static efunc errfunc;
 107 static struct ofmt *outfmt;
 108 static ListGen *list;
 109
 110 static int32_t calcsize(int32_t, int32_t, int, insn *, const char *);
 111 static void gencode(int32_t, int32_t, int, insn *, const char *, int32_t);
 112 static int matches(const struct itemplate *, insn *, int bits);
 113 static int32_t regflag(const operand *);
 114 static int32_t regval(const operand *);
 115 static int rexflags(int, int32_t, int);
 116 static int op_rexflags(const operand *, int);
 117 static ea *process_ea(operand *, ea *, int, int, int, int32_t, int);
 118 static void add_asp(insn *, int);
 119
 120 static int has_prefix(insn * ins, enum prefix_pos pos, enum prefixes prefix)
 121 {
 122     return ins->prefixes[pos] == prefix;
 123 }
 124
 125 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 126 {
 127     if (ins->prefixes[pos])
 128         errfunc(ERR_NONFATAL, "invalid %s prefix",
 129                 prefix_name(ins->prefixes[pos]));
 130 }
 131
 132 static const char *size_name(int size)
 133 {
 134     switch (size) {
 135     case 1:
 136         return "byte";
 137     case 2:
 138         return "word";
 139     case 4:
 140         return "dword";
 141     case 8:
 142         return "qword";
 143     case 10:
 144         return "tword";
 145     case 16:
 146         return "oword";
 147     default:
 148         return "???";
 149     }
 150 }
 151
 152 static void warn_overflow(int size, int64_t data)
 153 {
 154     if (size < 8) {
 155         int64_t lim = (1 << (size*8))-1;
 156
 157         if (data < ~lim || data > lim)
 158             errfunc(ERR_WARNING, "%s data exceeds bounds", size_name(size));
 159     }
 160 }
 161 /*
 162  * This routine wrappers the real output format's output routine,
 163  * in order to pass a copy of the data off to the listing file
 164  * generator at the same time.
 165  */
 166 static void out(int32_t offset, int32_t segto, const void *data,
 167                 uint32_t type, int32_t segment, int32_t wrt)
 168 {
 169     static int32_t lineno = 0;     /* static!!! */
 170     static char *lnfname = NULL;
 171
 172     if ((type & OUT_TYPMASK) == OUT_ADDRESS) {
 173         if (segment != NO_SEG || wrt != NO_SEG) {
 174             /*
 175              * This address is relocated. We must write it as
 176              * OUT_ADDRESS, so there's no work to be done here.
 177              */
 178             list->output(offset, data, type);
 179         } else {
 180             uint8_t p[8], *q = p;
 181             /*
 182              * This is a non-relocated address, and we're going to
 183              * convert it into RAWDATA format.
 184              */
 185             if ((type & OUT_SIZMASK) == 4) {
 186                 WRITELONG(q, *(int32_t *)data);
 187                 list->output(offset, p, OUT_RAWDATA + 4);
 188             } else if ((type & OUT_SIZMASK) == 8) {
 189                 WRITEDLONG(q, *(int64_t *)data);
 190                 list->output(offset, p, OUT_RAWDATA + 8);
 191             } else {
 192                 WRITESHORT(q, *(int32_t *)data);
 193                 list->output(offset, p, OUT_RAWDATA + 2);
 194             }
 195         }
 196     } else if ((type & OUT_TYPMASK) == OUT_RAWDATA) {
 197         list->output(offset, data, type);
 198     } else if ((type & OUT_TYPMASK) == OUT_RESERVE) {
 199         list->output(offset, NULL, type);
 200     } else if ((type & OUT_TYPMASK) == OUT_REL2ADR ||
 201                (type & OUT_TYPMASK) == OUT_REL4ADR) {
 202         list->output(offset, data, type);
 203     }
 204
 205     /*
 206      * this call to src_get determines when we call the
 207      * debug-format-specific "linenum" function
 208      * it updates lineno and lnfname to the current values
 209      * returning 0 if "same as last time", -2 if lnfname
 210      * changed, and the amount by which lineno changed,
 211      * if it did. thus, these variables must be static
 212      */
 213
 214     if (src_get(&lineno, &lnfname)) {
 215         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 216     }
 217
 218     outfmt->output(segto, data, type, segment, wrt);
 219 }
 220
 221 static int jmp_match(int32_t segment, int32_t offset, int bits,
 222                      insn * ins, const char *code)
 223 {
 224     int32_t isize;
 225     uint8_t c = code[0];
 226
 227     if (c != 0370 && c != 0371)
 228         return 0;
 229     if (ins->oprs[0].opflags & OPFLAG_FORWARD) {
 230         if ((optimizing < 0 || (ins->oprs[0].type & STRICT))
 231             && c == 0370)
 232             return 1;
 233         else
 234             return (pass0 == 0);        /* match a forward reference */
 235     }
 236     isize = calcsize(segment, offset, bits, ins, code);
 237     if (ins->oprs[0].segment != segment)
 238         return 0;
 239     isize = ins->oprs[0].offset - offset - isize;       /* isize is now the delta */
 240     if (isize >= -128L && isize <= 127L)
 241         return 1;               /* it is byte size */
 242
 243     return 0;
 244 }
 245
 246 int32_t assemble(int32_t segment, int32_t offset, int bits, uint32_t cp,
 247               insn * instruction, struct ofmt *output, efunc error,
 248               ListGen * listgen)
 249 {
 250     const struct itemplate *temp;
 251     int j;
 252     int size_prob;
 253     int32_t insn_end;
 254     int32_t itimes;
 255     int32_t start = offset;
 256     int32_t wsize = 0;             /* size for DB etc. */
 257
 258     errfunc = error;            /* to pass to other functions */
 259     cpu = cp;
 260     outfmt = output;            /* likewise */
 261     list = listgen;             /* and again */
 262
 263     switch (instruction->opcode) {
 264     case -1:
 265         return 0;
 266     case I_DB:
 267         wsize = 1;
 268         break;
 269     case I_DW:
 270         wsize = 2;
 271         break;
 272     case I_DD:
 273         wsize = 4;
 274         break;
 275     case I_DQ:
 276         wsize = 8;
 277         break;
 278     case I_DT:
 279         wsize = 10;
 280         break;
 281     case I_DO:
 282         wsize = 16;
 283         break;
 284     default:
 285         break;
 286     }
 287
 288     if (wsize) {
 289         extop *e;
 290         int32_t t = instruction->times;
 291         if (t < 0)
 292             errfunc(ERR_PANIC,
 293                     "instruction->times < 0 (%ld) in assemble()", t);
 294
 295         while (t--) {           /* repeat TIMES times */
 296             for (e = instruction->eops; e; e = e->next) {
 297                 if (e->type == EOT_DB_NUMBER) {
 298                     if (wsize == 1) {
 299                         if (e->segment != NO_SEG)
 300                             errfunc(ERR_NONFATAL,
 301                                     "one-byte relocation attempted");
 302                         else {
 303                             uint8_t out_byte = e->offset;
 304                             out(offset, segment, &out_byte,
 305                                 OUT_RAWDATA + 1, NO_SEG, NO_SEG);
 306                         }
 307                     } else if (wsize > 8) {
 308                         errfunc(ERR_NONFATAL, "integer supplied to a DT or DO"
 309                                 " instruction");
 310                     } else
 311                         out(offset, segment, &e->offset,
 312                             OUT_ADDRESS + wsize, e->segment, e->wrt);
 313                     offset += wsize;
 314                 } else if (e->type == EOT_DB_STRING) {
 315                     int align;
 316
 317                     out(offset, segment, e->stringval,
 318                         OUT_RAWDATA + e->stringlen, NO_SEG, NO_SEG);
 319                     align = e->stringlen % wsize;
 320
 321                     if (align) {
 322                         align = wsize - align;
 323                         out(offset, segment, "\0\0\0\0\0\0\0\0",
 324                             OUT_RAWDATA + align, NO_SEG, NO_SEG);
 325                     }
 326                     offset += e->stringlen + align;
 327                 }
 328             }
 329             if (t > 0 && t == instruction->times - 1) {
 330                 /*
 331                  * Dummy call to list->output to give the offset to the
 332                  * listing module.
 333                  */
 334                 list->output(offset, NULL, OUT_RAWDATA);
 335                 list->uplevel(LIST_TIMES);
 336             }
 337         }
 338         if (instruction->times > 1)
 339             list->downlevel(LIST_TIMES);
 340         return offset - start;
 341     }
 342
 343     if (instruction->opcode == I_INCBIN) {
 344         static char fname[FILENAME_MAX];
 345         FILE *fp;
 346         int32_t len;
 347         char *prefix = "", *combine;
 348         char **pPrevPath = NULL;
 349
 350         len = FILENAME_MAX - 1;
 351         if (len > instruction->eops->stringlen)
 352             len = instruction->eops->stringlen;
 353         strncpy(fname, instruction->eops->stringval, len);
 354         fname[len] = '\0';
 355
 356         while (1) {         /* added by alexfru: 'incbin' uses include paths */
 357             combine = nasm_malloc(strlen(prefix) + len + 1);
 358             strcpy(combine, prefix);
 359             strcat(combine, fname);
 360
 361             if ((fp = fopen(combine, "rb")) != NULL) {
 362                 nasm_free(combine);
 363                 break;
 364             }
 365
 366             nasm_free(combine);
 367             pPrevPath = pp_get_include_path_ptr(pPrevPath);
 368             if (pPrevPath == NULL)
 369                 break;
 370             prefix = *pPrevPath;
 371         }
 372
 373         if (fp == NULL)
 374             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 375                   fname);
 376         else if (fseek(fp, 0L, SEEK_END) < 0)
 377             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 378                   fname);
 379         else {
 380             static char buf[2048];
 381             int32_t t = instruction->times;
 382             int32_t base = 0;
 383
 384             len = ftell(fp);
 385             if (instruction->eops->next) {
 386                 base = instruction->eops->next->offset;
 387                 len -= base;
 388                 if (instruction->eops->next->next &&
 389                     len > instruction->eops->next->next->offset)
 390                     len = instruction->eops->next->next->offset;
 391             }
 392             /*
 393              * Dummy call to list->output to give the offset to the
 394              * listing module.
 395              */
 396             list->output(offset, NULL, OUT_RAWDATA);
 397             list->uplevel(LIST_INCBIN);
 398             while (t--) {
 399                 int32_t l;
 400
 401                 fseek(fp, base, SEEK_SET);
 402                 l = len;
 403                 while (l > 0) {
 404                     int32_t m =
 405                         fread(buf, 1, (l > (int32_t) sizeof(buf) ? (int32_t) sizeof(buf) : l),
 406                               fp);
 407                     if (!m) {
 408                         /*
 409                          * This shouldn't happen unless the file
 410                          * actually changes while we are reading
 411                          * it.
 412                          */
 413                         error(ERR_NONFATAL,
 414                               "`incbin': unexpected EOF while"
 415                               " reading file `%s'", fname);
 416                         t = 0;  /* Try to exit cleanly */
 417                         break;
 418                     }
 419                     out(offset, segment, buf, OUT_RAWDATA + m,
 420                         NO_SEG, NO_SEG);
 421                     l -= m;
 422                 }
 423             }
 424             list->downlevel(LIST_INCBIN);
 425             if (instruction->times > 1) {
 426                 /*
 427                  * Dummy call to list->output to give the offset to the
 428                  * listing module.
 429                  */
 430                 list->output(offset, NULL, OUT_RAWDATA);
 431                 list->uplevel(LIST_TIMES);
 432                 list->downlevel(LIST_TIMES);
 433             }
 434             fclose(fp);
 435             return instruction->times * len;
 436         }
 437         return 0;               /* if we're here, there's an error */
 438     }
 439
 440     /* Check to see if we need an address-size prefix */
 441     add_asp(instruction, bits);
 442
 443     size_prob = false;
 444
 445     for (temp = nasm_instructions[instruction->opcode]; temp->opcode != -1; temp++){
 446         int m = matches(temp, instruction, bits);
 447
 448         if (m == 99)
 449             m += jmp_match(segment, offset, bits, instruction, temp->code);
 450
 451         if (m == 100) {         /* matches! */
 452             const char *codes = temp->code;
 453             int32_t insn_size = calcsize(segment, offset, bits,
 454                                       instruction, codes);
 455             itimes = instruction->times;
 456             if (insn_size < 0)  /* shouldn't be, on pass two */
 457                 error(ERR_PANIC, "errors made it through from pass one");
 458             else
 459                 while (itimes--) {
 460                     for (j = 0; j < MAXPREFIX; j++) {
 461                         uint8_t c = 0;
 462                         switch (instruction->prefixes[j]) {
 463                         case P_LOCK:
 464                             c = 0xF0;
 465                             break;
 466                         case P_REPNE:
 467                         case P_REPNZ:
 468                             c = 0xF2;
 469                             break;
 470                         case P_REPE:
 471                         case P_REPZ:
 472                         case P_REP:
 473                             c = 0xF3;
 474                             break;
 475                         case R_CS:
 476                             if (bits == 64) {
 477                                 error(ERR_WARNING,
 478                                       "cs segment base ignored in 64-bit mode");
 479                             }
 480                             c = 0x2E;
 481                             break;
 482                         case R_DS:
 483                             if (bits == 64) {
 484                                 error(ERR_WARNING,
 485                                       "ds segment base ignored in 64-bit mode");
 486                             }
 487                             c = 0x3E;
 488                             break;
 489                         case R_ES:
 490                            if (bits == 64) {
 491                                 error(ERR_WARNING,
 492                                       "es segment base ignored in 64-bit mode");
 493                            }
 494                             c = 0x26;
 495                             break;
 496                         case R_FS:
 497                             c = 0x64;
 498                             break;
 499                         case R_GS:
 500                             c = 0x65;
 501                             break;
 502                         case R_SS:
 503                             if (bits == 64) {
 504                                 error(ERR_WARNING,
 505                                       "ss segment base ignored in 64-bit mode");
 506                             }
 507                             c = 0x36;
 508                             break;
 509                         case R_SEGR6:
 510                         case R_SEGR7:
 511                             error(ERR_NONFATAL,
 512                                   "segr6 and segr7 cannot be used as prefixes");
 513                             break;
 514                         case P_A16:
 515                             if (bits == 64) {
 516                                 error(ERR_NONFATAL,
 517                                       "16-bit addressing is not supported "
 518                                       "in 64-bit mode");
 519                             } else if (bits != 16)
 520                                 c = 0x67;
 521                             break;
 522                         case P_A32:
 523                             if (bits != 32)
 524                                 c = 0x67;
 525                             break;
 526                         case P_A64:
 527                             if (bits != 64) {
 528                                 error(ERR_NONFATAL,
 529                                       "64-bit addressing is only supported "
 530                                       "in 64-bit mode");
 531                             }
 532                             break;
 533                         case P_ASP:
 534                             c = 0x67;
 535                             break;
 536                         case P_O16:
 537                             if (bits != 16)
 538                                 c = 0x66;
 539                             break;
 540                         case P_O32:
 541                             if (bits == 16)
 542                                 c = 0x66;
 543                             break;
 544                         case P_O64:
 545                             /* REX.W */
 546                             break;
 547                         case P_OSP:
 548                             c = 0x66;
 549                             break;
 550                         case P_none:
 551                             break;
 552                         default:
 553                             error(ERR_PANIC, "invalid instruction prefix");
 554                         }
 555                         if (c != 0) {
 556                             out(offset, segment, &c, OUT_RAWDATA + 1,
 557                                 NO_SEG, NO_SEG);
 558                             offset++;
 559                         }
 560                     }
 561                     insn_end = offset + insn_size;
 562                     gencode(segment, offset, bits, instruction, codes,
 563                             insn_end);
 564                     offset += insn_size;
 565                     if (itimes > 0 && itimes == instruction->times - 1) {
 566                         /*
 567                          * Dummy call to list->output to give the offset to the
 568                          * listing module.
 569                          */
 570                         list->output(offset, NULL, OUT_RAWDATA);
 571                         list->uplevel(LIST_TIMES);
 572                     }
 573                 }
 574             if (instruction->times > 1)
 575                 list->downlevel(LIST_TIMES);
 576             return offset - start;
 577         } else if (m > 0 && m > size_prob) {
 578             size_prob = m;
 579         }
 580 //        temp++;
 581     }
 582
 583     if (temp->opcode == -1) {   /* didn't match any instruction */
 584         switch (size_prob) {
 585         case 1:
 586             error(ERR_NONFATAL, "operation size not specified");
 587             break;
 588         case 2:
 589             error(ERR_NONFATAL, "mismatch in operand sizes");
 590             break;
 591         case 3:
 592             error(ERR_NONFATAL, "no instruction for this cpu level");
 593             break;
 594         case 4:
 595             error(ERR_NONFATAL, "instruction not supported in 64-bit mode");
 596             break;
 597         default:
 598             error(ERR_NONFATAL,
 599                   "invalid combination of opcode and operands");
 600             break;
 601         }
 602     }
 603     return 0;
 604 }
 605
 606 int32_t insn_size(int32_t segment, int32_t offset, int bits, uint32_t cp,
 607                insn * instruction, efunc error)
 608 {
 609     const struct itemplate *temp;
 610
 611     errfunc = error;            /* to pass to other functions */
 612     cpu = cp;
 613
 614     if (instruction->opcode == -1)
 615         return 0;
 616
 617     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 618         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 619         instruction->opcode == I_DT || instruction->opcode == I_DO) {
 620         extop *e;
 621         int32_t isize, osize, wsize = 0;   /* placate gcc */
 622
 623         isize = 0;
 624         switch (instruction->opcode) {
 625         case I_DB:
 626             wsize = 1;
 627             break;
 628         case I_DW:
 629             wsize = 2;
 630             break;
 631         case I_DD:
 632             wsize = 4;
 633             break;
 634         case I_DQ:
 635             wsize = 8;
 636             break;
 637         case I_DT:
 638             wsize = 10;
 639             break;
 640         case I_DO:
 641             wsize = 16;
 642             break;
 643         default:
 644             break;
 645         }
 646
 647         for (e = instruction->eops; e; e = e->next) {
 648             int32_t align;
 649
 650             osize = 0;
 651             if (e->type == EOT_DB_NUMBER)
 652                 osize = 1;
 653             else if (e->type == EOT_DB_STRING)
 654                 osize = e->stringlen;
 655
 656             align = (-osize) % wsize;
 657             if (align < 0)
 658                 align += wsize;
 659             isize += osize + align;
 660         }
 661         return isize * instruction->times;
 662     }
 663
 664     if (instruction->opcode == I_INCBIN) {
 665         char fname[FILENAME_MAX];
 666         FILE *fp;
 667         int32_t len;
 668         char *prefix = "", *combine;
 669         char **pPrevPath = NULL;
 670
 671         len = FILENAME_MAX - 1;
 672         if (len > instruction->eops->stringlen)
 673             len = instruction->eops->stringlen;
 674         strncpy(fname, instruction->eops->stringval, len);
 675         fname[len] = '\0';
 676
 677         /* added by alexfru: 'incbin' uses include paths */
 678         while (1) {
 679             combine = nasm_malloc(strlen(prefix) + len + 1);
 680             strcpy(combine, prefix);
 681             strcat(combine, fname);
 682
 683             if ((fp = fopen(combine, "rb")) != NULL) {
 684                 nasm_free(combine);
 685                 break;
 686             }
 687
 688             nasm_free(combine);
 689             pPrevPath = pp_get_include_path_ptr(pPrevPath);
 690             if (pPrevPath == NULL)
 691                 break;
 692             prefix = *pPrevPath;
 693         }
 694
 695         if (fp == NULL)
 696             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 697                   fname);
 698         else if (fseek(fp, 0L, SEEK_END) < 0)
 699             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 700                   fname);
 701         else {
 702             len = ftell(fp);
 703             fclose(fp);
 704             if (instruction->eops->next) {
 705                 len -= instruction->eops->next->offset;
 706                 if (instruction->eops->next->next &&
 707                     len > instruction->eops->next->next->offset) {
 708                     len = instruction->eops->next->next->offset;
 709                 }
 710             }
 711             return instruction->times * len;
 712         }
 713         return 0;               /* if we're here, there's an error */
 714     }
 715
 716     /* Check to see if we need an address-size prefix */
 717     add_asp(instruction, bits);
 718
 719     for (temp = nasm_instructions[instruction->opcode]; temp->opcode != -1; temp++) {
 720         int m = matches(temp, instruction, bits);
 721         if (m == 99)
 722             m += jmp_match(segment, offset, bits, instruction, temp->code);
 723
 724         if (m == 100) {
 725             /* we've matched an instruction. */
 726             int32_t isize;
 727             const char *codes = temp->code;
 728             int j;
 729
 730             isize = calcsize(segment, offset, bits, instruction, codes);
 731             if (isize < 0)
 732                 return -1;
 733             for (j = 0; j < MAXPREFIX; j++) {
 734                 switch (instruction->prefixes[j]) {
 735                 case P_A16:
 736                     if (bits != 16)
 737                         isize++;
 738                     break;
 739                 case P_A32:
 740                     if (bits != 32)
 741                         isize++;
 742                     break;
 743                 case P_O16:
 744                     if (bits != 16)
 745                         isize++;
 746                     break;
 747                 case P_O32:
 748                     if (bits == 16)
 749                         isize++;
 750                     break;
 751                 case P_A64:
 752                 case P_O64:
 753                 case P_none:
 754                     break;
 755                 default:
 756                     isize++;
 757                     break;
 758                 }
 759             }
 760             return isize * instruction->times;
 761         }
 762     }
 763     return -1;                  /* didn't match any instruction */
 764 }
 765
 766 /* check that  opn[op]  is a signed byte of size 16 or 32,
 767                                         and return the signed value*/
 768 static int is_sbyte(insn * ins, int op, int size)
 769 {
 770     int32_t v;
 771     int ret;
 772
 773     ret = !(ins->forw_ref && ins->oprs[op].opflags) &&  /* dead in the water on forward reference or External */
 774         optimizing >= 0 &&
 775         !(ins->oprs[op].type & STRICT) &&
 776         ins->oprs[op].wrt == NO_SEG && ins->oprs[op].segment == NO_SEG;
 777
 778     v = ins->oprs[op].offset;
 779     if (size == 16)
 780         v = (int16_t)v;    /* sign extend if 16 bits */
 781
 782     return ret && v >= -128L && v <= 127L;
 783 }
 784
 785 static int32_t calcsize(int32_t segment, int32_t offset, int bits,
 786                      insn * ins, const char *codes)
 787 {
 788     int32_t length = 0;
 789     uint8_t c;
 790     int rex_mask = ~0;
 791     ins->rex = 0;               /* Ensure REX is reset */
 792
 793     if (ins->prefixes[PPS_OSIZE] == P_O64)
 794         ins->rex |= REX_W;
 795
 796     (void)segment;              /* Don't warn that this parameter is unused */
 797     (void)offset;               /* Don't warn that this parameter is unused */
 798
 799     while (*codes)
 800         switch (c = *codes++) {
 801         case 01:
 802         case 02:
 803         case 03:
 804             codes += c, length += c;
 805             break;
 806         case 04:
 807         case 05:
 808         case 06:
 809         case 07:
 810             length++;
 811             break;
 812         case 010:
 813         case 011:
 814         case 012:
 815         case 013:
 816             ins->rex |=
 817                 op_rexflags(&ins->oprs[c - 010], REX_B|REX_H|REX_P|REX_W);
 818             codes++, length++;
 819             break;
 820         case 014:
 821         case 015:
 822         case 016:
 823         case 017:
 824             length++;
 825             break;
 826         case 020:
 827         case 021:
 828         case 022:
 829         case 023:
 830             length++;
 831             break;
 832         case 024:
 833         case 025:
 834         case 026:
 835         case 027:
 836             length++;
 837             break;
 838         case 030:
 839         case 031:
 840         case 032:
 841         case 033:
 842             length += 2;
 843             break;
 844         case 034:
 845         case 035:
 846         case 036:
 847         case 037:
 848             if (ins->oprs[c - 034].type & (BITS16 | BITS32 | BITS64))
 849                 length += (ins->oprs[c - 034].type & BITS16) ? 2 : 4;
 850             else
 851                 length += (bits == 16) ? 2 : 4;
 852             break;
 853         case 040:
 854         case 041:
 855         case 042:
 856         case 043:
 857             length += 4;
 858             break;
 859         case 044:
 860         case 045:
 861         case 046:
 862         case 047:
 863             length += ins->addr_size >> 3;
 864             break;
 865         case 050:
 866         case 051:
 867         case 052:
 868         case 053:
 869             length++;
 870             break;
 871         case 054:
 872         case 055:
 873         case 056:
 874         case 057:
 875             length += 8; /* MOV reg64/imm */
 876             break;
 877         case 060:
 878         case 061:
 879         case 062:
 880         case 063:
 881             length += 2;
 882             break;
 883         case 064:
 884         case 065:
 885         case 066:
 886         case 067:
 887             if (ins->oprs[c - 064].type & (BITS16 | BITS32 | BITS64))
 888                 length += (ins->oprs[c - 064].type & BITS16) ? 2 : 4;
 889             else
 890                 length += (bits == 16) ? 2 : 4;
 891             break;
 892         case 070:
 893         case 071:
 894         case 072:
 895         case 073:
 896             length += 4;
 897             break;
 898         case 074:
 899         case 075:
 900         case 076:
 901         case 077:
 902             length += 2;
 903             break;
 904         case 0140:
 905         case 0141:
 906         case 0142:
 907         case 0143:
 908             length += is_sbyte(ins, c - 0140, 16) ? 1 : 2;
 909             break;
 910         case 0144:
 911         case 0145:
 912         case 0146:
 913         case 0147:
 914             codes += 2;
 915             length++;
 916             break;
 917         case 0150:
 918         case 0151:
 919         case 0152:
 920         case 0153:
 921             length += is_sbyte(ins, c - 0150, 32) ? 1 : 4;
 922             break;
 923         case 0154:
 924         case 0155:
 925         case 0156:
 926         case 0157:
 927             codes += 2;
 928             length++;
 929             break;
 930         case 0160:
 931         case 0161:
 932         case 0162:
 933         case 0163:
 934             length++;
 935             ins->rex |= REX_D;
 936             ins->drexdst = regval(&ins->oprs[c & 3]);
 937             break;
 938         case 0164:
 939         case 0165:
 940         case 0166:
 941         case 0167:
 942             length++;
 943             ins->rex |= REX_D|REX_OC;
 944             ins->drexdst = regval(&ins->oprs[c & 3]);
 945             break;
 946         case 0170:
 947             length++;
 948             break;
 949         case 0171:
 950             break;
 951         case 0300:
 952         case 0301:
 953         case 0302:
 954         case 0303:
 955             break;
 956         case 0310:
 957             if (bits == 64)
 958                 return -1;
 959             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
 960             break;
 961         case 0311:
 962             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
 963             break;
 964         case 0312:
 965             break;
 966         case 0313:
 967             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
 968                 has_prefix(ins, PPS_ASIZE, P_A32))
 969                 return -1;
 970             break;
 971         case 0320:
 972             length += (bits != 16);
 973             break;
 974         case 0321:
 975             length += (bits == 16);
 976             break;
 977         case 0322:
 978             break;
 979         case 0323:
 980             rex_mask &= ~REX_W;
 981             break;
 982         case 0324:
 983             ins->rex |= REX_W;
 984             break;
 985         case 0330:
 986             codes++, length++;
 987             break;
 988         case 0331:
 989             break;
 990         case 0332:
 991         case 0333:
 992             length++;
 993             break;
 994         case 0334:
 995             ins->rex |= REX_L;
 996             break;
 997         case 0335:
 998             break;
 999         case 0340:
1000         case 0341:
1001         case 0342:
1002             if (ins->oprs[0].segment != NO_SEG)
1003                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1004                         " quantity of BSS space");
1005             else
1006                 length += ins->oprs[0].offset << (c - 0340);
1007             break;
1008         case 0364:
1009         case 0365:
1010             break;
1011         case 0366:
1012         case 0367:
1013             length++;
1014             break;
1015         case 0370:
1016         case 0371:
1017         case 0372:
1018             break;
1019         case 0373:
1020             length++;
1021             break;
1022         default:               /* can't do it by 'case' statements */
1023             if (c >= 0100 && c <= 0277) {       /* it's an EA */
1024                 ea ea_data;
1025                 int rfield;
1026                 int32_t rflags;
1027                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1028
1029                 if (c <= 0177) {
1030                     /* pick rfield from operand b */
1031                     rflags = regflag(&ins->oprs[c & 7]);
1032                     rfield = regvals[ins->oprs[c & 7].basereg];
1033                 } else {
1034                     rflags = 0;
1035                     rfield = c & 7;
1036                 }
1037
1038                 if (!process_ea
1039                     (&ins->oprs[(c >> 3) & 7], &ea_data, bits,
1040                      ins->addr_size, rfield, rflags, ins->forw_ref)) {
1041                     errfunc(ERR_NONFATAL, "invalid effective address");
1042                     return -1;
1043                 } else {
1044                     ins->rex |= ea_data.rex;
1045                     length += ea_data.size;
1046                 }
1047             } else
1048                 errfunc(ERR_PANIC, "internal instruction table corrupt"
1049                         ": instruction code 0x%02X given", c);
1050         }
1051
1052     ins->rex &= rex_mask;
1053
1054     if (ins->rex & REX_D) {
1055         if (ins->rex & REX_H) {
1056             errfunc(ERR_NONFATAL, "cannot use high register in drex instruction");
1057             return -1;
1058         }
1059         if (bits != 64 && ((ins->rex & (REX_W|REX_X|REX_B)) ||
1060                            ins->drexdst > 7)) {
1061             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1062             return -1;
1063         }
1064         length++;
1065     } else if (ins->rex & REX_REAL) {
1066         if (ins->rex & REX_H) {
1067             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1068             return -1;
1069         } else if (bits == 64) {
1070             length++;
1071         } else if ((ins->rex & REX_L) &&
1072                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1073                    cpu >= IF_X86_64) {
1074             /* LOCK-as-REX.R */
1075             assert_no_prefix(ins, PPS_LREP);
1076             length++;
1077         } else {
1078             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1079             return -1;
1080         }
1081     }
1082
1083     return length;
1084 }
1085
1086 #define EMIT_REX()                                                      \
1087     if (!(ins->rex & REX_D) && (ins->rex & REX_REAL) && (bits == 64)) { \
1088         ins->rex = (ins->rex & REX_REAL)|REX_P;                         \
1089         out(offset, segment, &ins->rex, OUT_RAWDATA+1, NO_SEG, NO_SEG); \
1090         ins->rex = 0;                                                   \
1091         offset += 1; \
1092     }
1093
1094 static void gencode(int32_t segment, int32_t offset, int bits,
1095                     insn * ins, const char *codes, int32_t insn_end)
1096 {
1097     static char condval[] = {   /* conditional opcodes */
1098         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1099         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1100         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1101     };
1102     uint8_t c;
1103     uint8_t bytes[4];
1104     int32_t size;
1105     int64_t data;
1106
1107     while (*codes)
1108         switch (c = *codes++) {
1109         case 01:
1110         case 02:
1111         case 03:
1112             EMIT_REX();
1113             out(offset, segment, codes, OUT_RAWDATA + c, NO_SEG, NO_SEG);
1114             codes += c;
1115             offset += c;
1116             break;
1117
1118         case 04:
1119         case 06:
1120             switch (ins->oprs[0].basereg) {
1121             case R_CS:
1122                 bytes[0] = 0x0E + (c == 0x04 ? 1 : 0);
1123                 break;
1124             case R_DS:
1125                 bytes[0] = 0x1E + (c == 0x04 ? 1 : 0);
1126                 break;
1127             case R_ES:
1128                 bytes[0] = 0x06 + (c == 0x04 ? 1 : 0);
1129                 break;
1130             case R_SS:
1131                 bytes[0] = 0x16 + (c == 0x04 ? 1 : 0);
1132                 break;
1133             default:
1134                 errfunc(ERR_PANIC,
1135                         "bizarre 8086 segment register received");
1136             }
1137             out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1138             offset++;
1139             break;
1140
1141         case 05:
1142         case 07:
1143             switch (ins->oprs[0].basereg) {
1144             case R_FS:
1145                 bytes[0] = 0xA0 + (c == 0x05 ? 1 : 0);
1146                 break;
1147             case R_GS:
1148                 bytes[0] = 0xA8 + (c == 0x05 ? 1 : 0);
1149                 break;
1150             default:
1151                 errfunc(ERR_PANIC,
1152                         "bizarre 386 segment register received");
1153             }
1154             out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1155             offset++;
1156             break;
1157
1158         case 010:
1159         case 011:
1160         case 012:
1161         case 013:
1162             EMIT_REX();
1163             bytes[0] = *codes++ + ((regval(&ins->oprs[c - 010])) & 7);
1164             out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1165             offset += 1;
1166             break;
1167
1168         case 014:
1169         case 015:
1170         case 016:
1171         case 017:
1172             if (ins->oprs[c - 014].offset < -128
1173                 || ins->oprs[c - 014].offset > 127) {
1174                 errfunc(ERR_WARNING, "signed byte value exceeds bounds");
1175             }
1176
1177             if (ins->oprs[c - 014].segment != NO_SEG) {
1178                 data = ins->oprs[c - 014].offset;
1179                 out(offset, segment, &data, OUT_ADDRESS + 1,
1180                     ins->oprs[c - 014].segment, ins->oprs[c - 014].wrt);
1181             } else {
1182                 bytes[0] = ins->oprs[c - 014].offset;
1183                 out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG,
1184                     NO_SEG);
1185             }
1186             offset += 1;
1187             break;
1188
1189         case 020:
1190         case 021:
1191         case 022:
1192         case 023:
1193             if (ins->oprs[c - 020].offset < -256
1194                 || ins->oprs[c - 020].offset > 255) {
1195                 errfunc(ERR_WARNING, "byte value exceeds bounds");
1196             }
1197             if (ins->oprs[c - 020].segment != NO_SEG) {
1198                 data = ins->oprs[c - 020].offset;
1199                 out(offset, segment, &data, OUT_ADDRESS + 1,
1200                     ins->oprs[c - 020].segment, ins->oprs[c - 020].wrt);
1201             } else {
1202                 bytes[0] = ins->oprs[c - 020].offset;
1203                 out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG,
1204                     NO_SEG);
1205             }
1206             offset += 1;
1207             break;
1208
1209         case 024:
1210         case 025:
1211         case 026:
1212         case 027:
1213             if (ins->oprs[c - 024].offset < 0
1214                 || ins->oprs[c - 024].offset > 255)
1215                 errfunc(ERR_WARNING, "unsigned byte value exceeds bounds");
1216             if (ins->oprs[c - 024].segment != NO_SEG) {
1217                 data = ins->oprs[c - 024].offset;
1218                 out(offset, segment, &data, OUT_ADDRESS + 1,
1219                     ins->oprs[c - 024].segment, ins->oprs[c - 024].wrt);
1220             } else {
1221                 bytes[0] = ins->oprs[c - 024].offset;
1222                 out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG,
1223                     NO_SEG);
1224             }
1225             offset += 1;
1226             break;
1227
1228         case 030:
1229         case 031:
1230         case 032:
1231         case 033:
1232             data = ins->oprs[c - 030].offset;
1233             if (ins->oprs[c - 030].segment == NO_SEG &&
1234                 ins->oprs[c - 030].wrt == NO_SEG)
1235                 warn_overflow(2, data);
1236             out(offset, segment, &data, OUT_ADDRESS + 2,
1237                 ins->oprs[c - 030].segment, ins->oprs[c - 030].wrt);
1238             offset += 2;
1239             break;
1240
1241         case 034:
1242         case 035:
1243         case 036:
1244         case 037:
1245             if (ins->oprs[c - 034].type & (BITS16 | BITS32))
1246                 size = (ins->oprs[c - 034].type & BITS16) ? 2 : 4;
1247             else
1248                 size = (bits == 16) ? 2 : 4;
1249             data = ins->oprs[c - 034].offset;
1250             if (ins->oprs[c - 034].segment == NO_SEG &&
1251                 ins->oprs[c - 034].wrt == NO_SEG)
1252                 warn_overflow(size, data);
1253             out(offset, segment, &data, OUT_ADDRESS + size,
1254                 ins->oprs[c - 034].segment, ins->oprs[c - 034].wrt);
1255             offset += size;
1256             break;
1257
1258         case 040:
1259         case 041:
1260         case 042:
1261         case 043:
1262             data = ins->oprs[c - 040].offset;
1263             out(offset, segment, &data, OUT_ADDRESS + 4,
1264                 ins->oprs[c - 040].segment, ins->oprs[c - 040].wrt);
1265             offset += 4;
1266             break;
1267
1268         case 044:
1269         case 045:
1270         case 046:
1271         case 047:
1272             data = ins->oprs[c - 044].offset;
1273             size = ins->addr_size >> 3;
1274             if (ins->oprs[c - 044].segment == NO_SEG &&
1275                 ins->oprs[c - 044].wrt == NO_SEG)
1276                 warn_overflow(size, data);
1277             out(offset, segment, &data, OUT_ADDRESS + size,
1278                 ins->oprs[c - 044].segment, ins->oprs[c - 044].wrt);
1279             offset += size;
1280             break;
1281
1282         case 050:
1283         case 051:
1284         case 052:
1285         case 053:
1286             if (ins->oprs[c - 050].segment != segment)
1287                 errfunc(ERR_NONFATAL,
1288                         "short relative jump outside segment");
1289             data = ins->oprs[c - 050].offset - insn_end;
1290             if (data > 127 || data < -128)
1291                 errfunc(ERR_NONFATAL, "short jump is out of range");
1292             bytes[0] = data;
1293             out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1294             offset += 1;
1295             break;
1296
1297         case 054:
1298         case 055:
1299         case 056:
1300         case 057:
1301             data = (int64_t)ins->oprs[c - 054].offset;
1302             out(offset, segment, &data, OUT_ADDRESS + 8,
1303                 ins->oprs[c - 054].segment, ins->oprs[c - 054].wrt);
1304             offset += 8;
1305             break;
1306
1307         case 060:
1308         case 061:
1309         case 062:
1310         case 063:
1311             if (ins->oprs[c - 060].segment != segment) {
1312                 data = ins->oprs[c - 060].offset;
1313                 out(offset, segment, &data,
1314                     OUT_REL2ADR + insn_end - offset,
1315                     ins->oprs[c - 060].segment, ins->oprs[c - 060].wrt);
1316             } else {
1317                 data = ins->oprs[c - 060].offset - insn_end;
1318                 out(offset, segment, &data,
1319                     OUT_ADDRESS + 2, NO_SEG, NO_SEG);
1320             }
1321             offset += 2;
1322             break;
1323
1324         case 064:
1325         case 065:
1326         case 066:
1327         case 067:
1328             if (ins->oprs[c - 064].type & (BITS16 | BITS32 | BITS64))
1329                 size = (ins->oprs[c - 064].type & BITS16) ? 2 : 4;
1330             else
1331                 size = (bits == 16) ? 2 : 4;
1332             if (ins->oprs[c - 064].segment != segment) {
1333                 int32_t reltype = (size == 2 ? OUT_REL2ADR : OUT_REL4ADR);
1334                 data = ins->oprs[c - 064].offset;
1335                 out(offset, segment, &data, reltype + insn_end - offset,
1336                     ins->oprs[c - 064].segment, ins->oprs[c - 064].wrt);
1337             } else {
1338                 data = ins->oprs[c - 064].offset - insn_end;
1339                 out(offset, segment, &data,
1340                     OUT_ADDRESS + size, NO_SEG, NO_SEG);
1341             }
1342             offset += size;
1343             break;
1344
1345         case 070:
1346         case 071:
1347         case 072:
1348         case 073:
1349             if (ins->oprs[c - 070].segment != segment) {
1350                 data = ins->oprs[c - 070].offset;
1351                 out(offset, segment, &data,
1352                     OUT_REL4ADR + insn_end - offset,
1353                     ins->oprs[c - 070].segment, ins->oprs[c - 070].wrt);
1354             } else {
1355                 data = ins->oprs[c - 070].offset - insn_end;
1356                 out(offset, segment, &data,
1357                     OUT_ADDRESS + 4, NO_SEG, NO_SEG);
1358             }
1359             offset += 4;
1360             break;
1361
1362         case 074:
1363         case 075:
1364         case 076:
1365         case 077:
1366             if (ins->oprs[c - 074].segment == NO_SEG)
1367                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1368                         " relocatable");
1369             data = 0L;
1370             out(offset, segment, &data, OUT_ADDRESS + 2,
1371                 outfmt->segbase(1 + ins->oprs[c - 074].segment),
1372                 ins->oprs[c - 074].wrt);
1373             offset += 2;
1374             break;
1375
1376         case 0140:
1377         case 0141:
1378         case 0142:
1379         case 0143:
1380             data = ins->oprs[c - 0140].offset;
1381             if (is_sbyte(ins, c - 0140, 16)) {
1382                 bytes[0] = data;
1383                 out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG,
1384                     NO_SEG);
1385                 offset++;
1386             } else {
1387                 if (ins->oprs[c - 0140].segment == NO_SEG &&
1388                     ins->oprs[c - 0140].wrt == NO_SEG)
1389                     warn_overflow(2, data);
1390                 out(offset, segment, &data, OUT_ADDRESS + 2,
1391                     ins->oprs[c - 0140].segment, ins->oprs[c - 0140].wrt);
1392                 offset += 2;
1393             }
1394             break;
1395
1396         case 0144:
1397         case 0145:
1398         case 0146:
1399         case 0147:
1400             EMIT_REX();
1401             codes++;
1402             bytes[0] = *codes++;
1403             if (is_sbyte(ins, c - 0144, 16))
1404                 bytes[0] |= 2;  /* s-bit */
1405             out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1406             offset++;
1407             break;
1408
1409         case 0150:
1410         case 0151:
1411         case 0152:
1412         case 0153:
1413             data = ins->oprs[c - 0150].offset;
1414             if (is_sbyte(ins, c - 0150, 32)) {
1415                 bytes[0] = data;
1416                 out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG,
1417                     NO_SEG);
1418                 offset++;
1419             } else {
1420                 out(offset, segment, &data, OUT_ADDRESS + 4,
1421                     ins->oprs[c - 0150].segment, ins->oprs[c - 0150].wrt);
1422                 offset += 4;
1423             }
1424             break;
1425
1426         case 0154:
1427         case 0155:
1428         case 0156:
1429         case 0157:
1430             EMIT_REX();
1431             codes++;
1432             bytes[0] = *codes++;
1433             if (is_sbyte(ins, c - 0154, 32))
1434                 bytes[0] |= 2;  /* s-bit */
1435             out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1436             offset++;
1437             break;
1438
1439         case 0160:
1440         case 0161:
1441         case 0162:
1442         case 0163:
1443         case 0164:
1444         case 0165:
1445         case 0166:
1446         case 0167:
1447             break;
1448
1449         case 0170:
1450             EMIT_REX();
1451             bytes[0] = 0;
1452             out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1453             offset += 1;
1454             break;
1455
1456         case 0171:
1457             bytes[0] =
1458                 (ins->drexdst << 4) |
1459                 (ins->rex & REX_OC ? 0x08 : 0) |
1460                 (ins->rex & (REX_R|REX_X|REX_B));
1461             ins->rex = 0;
1462             out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1463             offset++;
1464             break;
1465
1466         case 0300:
1467         case 0301:
1468         case 0302:
1469         case 0303:
1470             break;
1471
1472         case 0310:
1473             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1474                 *bytes = 0x67;
1475                 out(offset, segment, bytes,
1476                     OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1477                 offset += 1;
1478             } else
1479                 offset += 0;
1480             break;
1481
1482         case 0311:
1483             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1484                 *bytes = 0x67;
1485                 out(offset, segment, bytes,
1486                     OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1487                 offset += 1;
1488             } else
1489                 offset += 0;
1490             break;
1491
1492         case 0312:
1493             break;
1494
1495         case 0313:
1496             ins->rex = 0;
1497             break;
1498
1499         case 0320:
1500             if (bits != 16) {
1501                 *bytes = 0x66;
1502                 out(offset, segment, bytes,
1503                     OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1504                 offset += 1;
1505             } else
1506                 offset += 0;
1507             break;
1508
1509         case 0321:
1510             if (bits == 16) {
1511                 *bytes = 0x66;
1512                 out(offset, segment, bytes,
1513                     OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1514                 offset += 1;
1515             } else
1516                 offset += 0;
1517             break;
1518
1519         case 0322:
1520         case 0323:
1521             break;
1522
1523         case 0324:
1524             ins->rex |= REX_W;
1525             break;
1526
1527         case 0330:
1528             *bytes = *codes++ ^ condval[ins->condition];
1529             out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1530             offset += 1;
1531             break;
1532
1533         case 0331:
1534             break;
1535
1536         case 0332:
1537         case 0333:
1538             *bytes = c - 0332 + 0xF2;
1539             out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1540             offset += 1;
1541             break;
1542
1543         case 0334:
1544             if (ins->rex & REX_R) {
1545                 *bytes = 0xF0;
1546                 out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1547                 offset += 1;
1548             }
1549             ins->rex &= ~(REX_L|REX_R);
1550             break;
1551
1552         case 0335:
1553             break;
1554
1555         case 0340:
1556         case 0341:
1557         case 0342:
1558             if (ins->oprs[0].segment != NO_SEG)
1559                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1560             else {
1561                 int32_t size = ins->oprs[0].offset << (c - 0340);
1562                 if (size > 0)
1563                     out(offset, segment, NULL,
1564                         OUT_RESERVE + size, NO_SEG, NO_SEG);
1565                 offset += size;
1566             }
1567             break;
1568
1569         case 0364:
1570         case 0365:
1571             break;
1572
1573         case 0366:
1574         case 0367:
1575             *bytes = c - 0366 + 0x66;
1576             out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1577             offset += 1;
1578             break;
1579
1580         case 0370:
1581         case 0371:
1582         case 0372:
1583             break;
1584
1585         case 0373:
1586             *bytes = bits == 16 ? 3 : 5;
1587             out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
1588             offset += 1;
1589             break;
1590
1591         default:               /* can't do it by 'case' statements */
1592             if (c >= 0100 && c <= 0277) {       /* it's an EA */
1593                 ea ea_data;
1594                 int rfield;
1595                 int32_t rflags;
1596                 uint8_t *p;
1597                 int32_t s;
1598
1599                 if (c <= 0177) {
1600                     /* pick rfield from operand b */
1601                     rflags = regflag(&ins->oprs[c & 7]);
1602                     rfield = regvals[ins->oprs[c & 7].basereg];
1603                 } else {
1604                     /* rfield is constant */
1605                     rflags = 0;
1606                     rfield = c & 7;
1607                 }
1608
1609                 if (!process_ea
1610                     (&ins->oprs[(c >> 3) & 7], &ea_data, bits,
1611                      ins->addr_size, rfield, rflags, ins->forw_ref)) {
1612                     errfunc(ERR_NONFATAL, "invalid effective address");
1613                 }
1614
1615                 p = bytes;
1616                 *p++ = ea_data.modrm;
1617                 if (ea_data.sib_present)
1618                     *p++ = ea_data.sib;
1619
1620                 /* DREX suffixes come between the SIB and the displacement */
1621                 if (ins->rex & REX_D) {
1622                     *p++ =
1623                         (ins->drexdst << 4) |
1624                         (ins->rex & REX_OC ? 0x08 : 0) |
1625                         (ins->rex & (REX_R|REX_X|REX_B));
1626                     ins->rex = 0;
1627                 }
1628
1629                 s = p - bytes;
1630                 out(offset, segment, bytes, OUT_RAWDATA + s,
1631                     NO_SEG, NO_SEG);
1632
1633                 switch (ea_data.bytes) {
1634                 case 0:
1635                     break;
1636                 case 1:
1637                     if (ins->oprs[(c >> 3) & 7].segment != NO_SEG) {
1638                         data = ins->oprs[(c >> 3) & 7].offset;
1639                         out(offset, segment, &data, OUT_ADDRESS + 1,
1640                             ins->oprs[(c >> 3) & 7].segment,
1641                             ins->oprs[(c >> 3) & 7].wrt);
1642                     } else {
1643                         *bytes = ins->oprs[(c >> 3) & 7].offset;
1644                         out(offset, segment, bytes, OUT_RAWDATA + 1,
1645                             NO_SEG, NO_SEG);
1646                     }
1647                     s++;
1648                     break;
1649                 case 8:
1650                 case 2:
1651                 case 4:
1652                     data = ins->oprs[(c >> 3) & 7].offset;
1653                     out(offset, segment, &data,
1654                         (ea_data.rip ?  OUT_REL4ADR : OUT_ADDRESS)
1655                         + ea_data.bytes,
1656                         ins->oprs[(c >> 3) & 7].segment,
1657                         ins->oprs[(c >> 3) & 7].wrt);
1658                     s += ea_data.bytes;
1659                     break;
1660                 }
1661                 offset += s;
1662             } else
1663                 errfunc(ERR_PANIC, "internal instruction table corrupt"
1664                         ": instruction code 0x%02X given", c);
1665         }
1666 }
1667
1668 static int32_t regflag(const operand * o)
1669 {
1670     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1671         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1672     }
1673     return reg_flags[o->basereg];
1674 }
1675
1676 static int32_t regval(const operand * o)
1677 {
1678     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1679         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1680     }
1681     return regvals[o->basereg];
1682 }
1683
1684 static int op_rexflags(const operand * o, int mask)
1685 {
1686     int32_t flags;
1687     int val;
1688
1689     if (o->basereg < EXPR_REG_START || o->basereg >= REG_ENUM_LIMIT) {
1690         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1691     }
1692
1693     flags = reg_flags[o->basereg];
1694     val = regvals[o->basereg];
1695
1696     return rexflags(val, flags, mask);
1697 }
1698
1699 static int rexflags(int val, int32_t flags, int mask)
1700 {
1701     int rex = 0;
1702
1703     if (val >= 8)
1704         rex |= REX_B|REX_X|REX_R;
1705     if (flags & BITS64)
1706         rex |= REX_W;
1707     if (!(REG_HIGH & ~flags))   /* AH, CH, DH, BH */
1708         rex |= REX_H;
1709     else if (!(REG8 & ~flags) && val >= 4) /* SPL, BPL, SIL, DIL */
1710         rex |= REX_P;
1711
1712     return rex & mask;
1713 }
1714
1715 static int matches(const struct itemplate *itemp, insn * instruction, int bits)
1716 {
1717     int i, size[MAX_OPERANDS], asize, oprs, ret;
1718
1719     ret = 100;
1720
1721     /*
1722      * Check the opcode
1723      */
1724     if (itemp->opcode != instruction->opcode)
1725         return 0;
1726
1727     /*
1728      * Count the operands
1729      */
1730     if (itemp->operands != instruction->operands)
1731         return 0;
1732
1733     /*
1734      * Check that no spurious colons or TOs are present
1735      */
1736     for (i = 0; i < itemp->operands; i++)
1737         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
1738             return 0;
1739
1740     /*
1741      * Check that the operand flags all match up
1742      */
1743     for (i = 0; i < itemp->operands; i++) {
1744         if (itemp->opd[i] & SAME_AS) {
1745             int j = itemp->opd[i] & ~SAME_AS;
1746             if (instruction->oprs[i].type != instruction->oprs[j].type ||
1747                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
1748                 return 0;
1749         } else  if (itemp->opd[i] & ~instruction->oprs[i].type ||
1750             ((itemp->opd[i] & SIZE_MASK) &&
1751              ((itemp->opd[i] ^ instruction->oprs[i].type) & SIZE_MASK))) {
1752             if ((itemp->opd[i] & ~instruction->oprs[i].type & ~SIZE_MASK) ||
1753                 (instruction->oprs[i].type & SIZE_MASK))
1754                 return 0;
1755             else
1756                 return 1;
1757         }
1758     }
1759
1760     /*
1761      * Check operand sizes
1762      */
1763     if (itemp->flags & IF_ARMASK) {
1764         memset(size, 0, sizeof size);
1765
1766         switch (itemp->flags & IF_ARMASK) {
1767         case IF_AR0:
1768             i = 0;
1769             break;
1770         case IF_AR1:
1771             i = 1;
1772             break;
1773         case IF_AR2:
1774             i = 2;
1775             break;
1776         case IF_AR3:
1777             i = 3;
1778             break;
1779         default:
1780             break;              /* Shouldn't happen */
1781         }
1782         switch (itemp->flags & IF_SMASK) {
1783         case IF_SB:
1784             size[i] = BITS8;
1785             break;
1786         case IF_SW:
1787             size[i] = BITS16;
1788             break;
1789         case IF_SD:
1790             size[i] = BITS32;
1791             break;
1792         case IF_SQ:
1793             size[i] = BITS64;
1794             break;
1795         case IF_SO:
1796             size[i] = BITS128;
1797             break;
1798         default:
1799             break;
1800         }
1801     } else {
1802         asize = 0;
1803         switch (itemp->flags & IF_SMASK) {
1804         case IF_SB:
1805             asize = BITS8;
1806             oprs = itemp->operands;
1807             break;
1808         case IF_SW:
1809             asize = BITS16;
1810             oprs = itemp->operands;
1811             break;
1812         case IF_SD:
1813             asize = BITS32;
1814             oprs = itemp->operands;
1815             break;
1816         case IF_SQ:
1817             asize = BITS64;
1818             oprs = itemp->operands;
1819             break;
1820         case IF_SO:
1821             asize = BITS128;
1822             oprs = itemp->operands;
1823             break;
1824         default:
1825             break;
1826         }
1827         for (i = 0; i < MAX_OPERANDS; i++)
1828             size[i] = asize;
1829     }
1830
1831     if (itemp->flags & (IF_SM | IF_SM2)) {
1832         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
1833         asize = 0;
1834         for (i = 0; i < oprs; i++) {
1835             if ((asize = itemp->opd[i] & SIZE_MASK) != 0) {
1836                 int j;
1837                 for (j = 0; j < oprs; j++)
1838                     size[j] = asize;
1839                 break;
1840             }
1841         }
1842     } else {
1843         oprs = itemp->operands;
1844     }
1845
1846     for (i = 0; i < itemp->operands; i++) {
1847         if (!(itemp->opd[i] & SIZE_MASK) &&
1848             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
1849             return 2;
1850     }
1851
1852     /*
1853      * Check template is okay at the set cpu level
1854      */
1855     if (((itemp->flags & IF_PLEVEL) > cpu))
1856         return 3;
1857
1858     /*
1859      * Check if instruction is available in long mode
1860      */
1861     if ((itemp->flags & IF_NOLONG) && (bits == 64))
1862         return 4;
1863
1864     /*
1865      * Check if special handling needed for Jumps
1866      */
1867     if ((uint8_t)(itemp->code[0]) >= 0370)
1868         return 99;
1869
1870     return ret;
1871 }
1872
1873 static ea *process_ea(operand * input, ea * output, int bits,
1874                       int addrbits, int rfield, int32_t rflags, int forw_ref)
1875 {
1876     output->rip = false;
1877
1878     /* REX flags for the rfield operand */
1879     output->rex |= rexflags(rfield, rflags, REX_R|REX_P|REX_W|REX_H);
1880
1881     if (!(REGISTER & ~input->type)) {   /* register direct */
1882         int i;
1883         int32_t f;
1884
1885         if (input->basereg < EXPR_REG_START /* Verify as Register */
1886             || input->basereg >= REG_ENUM_LIMIT)
1887             return NULL;
1888         f = regflag(input);
1889         i = regvals[input->basereg];
1890
1891         if (REG_EA & ~f)
1892             return NULL;        /* Invalid EA register */
1893
1894         output->rex |= op_rexflags(input, REX_B|REX_P|REX_W|REX_H);
1895
1896         output->sib_present = false;             /* no SIB necessary */
1897         output->bytes = 0;  /* no offset necessary either */
1898         output->modrm = 0xC0 | ((rfield & 7) << 3) | (i & 7);
1899     } else {                    /* it's a memory reference */
1900         if (input->basereg == -1
1901             && (input->indexreg == -1 || input->scale == 0)) {
1902             /* it's a pure offset */
1903             if (bits == 64 && (~input->type & IP_REL)) {
1904               int scale, index, base;
1905               output->sib_present = true;
1906               scale = 0;
1907               index = 4;
1908               base = 5;
1909               output->sib = (scale << 6) | (index << 3) | base;
1910               output->bytes = 4;
1911               output->modrm = 4 | ((rfield & 7) << 3);
1912               output->rip = false;
1913             } else {
1914               output->sib_present = false;
1915               output->bytes = (addrbits != 16 ? 4 : 2);
1916               output->modrm = (addrbits != 16 ? 5 : 6) | ((rfield & 7) << 3);
1917               output->rip = bits == 64;
1918             }
1919         } else {                /* it's an indirection */
1920             int i = input->indexreg, b = input->basereg, s = input->scale;
1921             int32_t o = input->offset, seg = input->segment;
1922             int hb = input->hintbase, ht = input->hinttype;
1923             int t;
1924             int it, bt;
1925             int32_t ix, bx;     /* register flags */
1926
1927             if (s == 0)
1928                 i = -1;         /* make this easy, at least */
1929
1930             if (i >= EXPR_REG_START && i < REG_ENUM_LIMIT) {
1931                 it = regvals[i];
1932                 ix = reg_flags[i];
1933             } else {
1934                 it = -1;
1935                 ix = 0;
1936             }
1937
1938             if (b >= EXPR_REG_START && b < REG_ENUM_LIMIT) {
1939                 bt = regvals[b];
1940                 bx = reg_flags[b];
1941             } else {
1942                 bt = -1;
1943                 bx = 0;
1944             }
1945
1946             /* check for a 32/64-bit memory reference... */
1947             if ((ix|bx) & (BITS32|BITS64)) {
1948                 /* it must be a 32/64-bit memory reference. Firstly we have
1949                  * to check that all registers involved are type E/Rxx. */
1950                 int32_t sok = BITS32|BITS64;
1951
1952                 if (it != -1) {
1953                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
1954                         sok &= ix;
1955                     else
1956                         return NULL;
1957                 }
1958
1959                 if (bt != -1) {
1960                     if (REG_GPR & ~bx)
1961                         return NULL; /* Invalid register */
1962                     if (~sok & bx & SIZE_MASK)
1963                         return NULL; /* Invalid size */
1964                     sok &= bx;
1965                 }
1966
1967                 /* While we're here, ensure the user didn't specify
1968                    WORD or QWORD. */
1969                 if (input->disp_size == 16 || input->disp_size == 64)
1970                     return NULL;
1971
1972                 if (addrbits == 16 ||
1973                     (addrbits == 32 && !(sok & BITS32)) ||
1974                     (addrbits == 64 && !(sok & BITS64)))
1975                     return NULL;
1976
1977                 /* now reorganize base/index */
1978                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
1979                     ((hb == b && ht == EAH_NOTBASE)
1980                      || (hb == i && ht == EAH_MAKEBASE))) {
1981                     /* swap if hints say so */
1982                     t = bt, bt = it, it = t;
1983                     t = bx, bx = ix, ix = t;
1984                 }
1985                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
1986                     bt = -1, bx = 0, s++;
1987                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
1988                     /* make single reg base, unless hint */
1989                     bt = it, bx = ix, it = -1, ix = 0;
1990                 }
1991                 if (((s == 2 && it != REG_NUM_ESP
1992                       && !(input->eaflags & EAF_TIMESTWO)) || s == 3
1993                      || s == 5 || s == 9) && bt == -1)
1994                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
1995                 if (it == -1 && (bt & 7) != REG_NUM_ESP
1996                     && (input->eaflags & EAF_TIMESTWO))
1997                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
1998                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
1999                 if (s == 1 && it == REG_NUM_ESP) {
2000                     /* swap ESP into base if scale is 1 */
2001                     t = it, it = bt, bt = t;
2002                     t = ix, ix = bx, bx = t;
2003                 }
2004                 if (it == REG_NUM_ESP
2005                     || (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2006                     return NULL;        /* wrong, for various reasons */
2007
2008                 output->rex |= rexflags(it, ix, REX_X);
2009                 output->rex |= rexflags(bt, bx, REX_B);
2010
2011                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2012                     /* no SIB needed */
2013                     int mod, rm;
2014
2015                     if (bt == -1) {
2016                         rm = 5;
2017                         mod = 0;
2018                     } else {
2019                         rm = (bt & 7);
2020                         if (rm != REG_NUM_EBP && o == 0 &&
2021                                 seg == NO_SEG && !forw_ref &&
2022                                 !(input->eaflags &
2023                                   (EAF_BYTEOFFS | EAF_WORDOFFS)))
2024                             mod = 0;
2025                         else if (input->eaflags & EAF_BYTEOFFS ||
2026                                  (o >= -128 && o <= 127 && seg == NO_SEG
2027                                   && !forw_ref
2028                                   && !(input->eaflags & EAF_WORDOFFS)))
2029                             mod = 1;
2030                         else
2031                             mod = 2;
2032                     }
2033
2034                     output->sib_present = false;
2035                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2036                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2037                 } else {
2038                     /* we need a SIB */
2039                     int mod, scale, index, base;
2040
2041                     if (it == -1)
2042                         index = 4, s = 1;
2043                     else
2044                         index = (it & 7);
2045
2046                     switch (s) {
2047                     case 1:
2048                         scale = 0;
2049                         break;
2050                     case 2:
2051                         scale = 1;
2052                         break;
2053                     case 4:
2054                         scale = 2;
2055                         break;
2056                     case 8:
2057                         scale = 3;
2058                         break;
2059                     default:   /* then what the smeg is it? */
2060                         return NULL;    /* panic */
2061                     }
2062
2063                     if (bt == -1) {
2064                         base = 5;
2065                         mod = 0;
2066                     } else {
2067                         base = (bt & 7);
2068                         if (base != REG_NUM_EBP && o == 0 &&
2069                                     seg == NO_SEG && !forw_ref &&
2070                                     !(input->eaflags &
2071                                       (EAF_BYTEOFFS | EAF_WORDOFFS)))
2072                             mod = 0;
2073                         else if (input->eaflags & EAF_BYTEOFFS ||
2074                                  (o >= -128 && o <= 127 && seg == NO_SEG
2075                                   && !forw_ref
2076                                   && !(input->eaflags & EAF_WORDOFFS)))
2077                             mod = 1;
2078                         else
2079                             mod = 2;
2080                     }
2081
2082                     output->sib_present = true;
2083                     output->bytes =  (bt == -1 || mod == 2 ? 4 : mod);
2084                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | 4;
2085                     output->sib = (scale << 6) | (index << 3) | base;
2086                 }
2087             } else {            /* it's 16-bit */
2088                 int mod, rm;
2089
2090                 /* check for 64-bit long mode */
2091                 if (addrbits == 64)
2092                     return NULL;
2093
2094                 /* check all registers are BX, BP, SI or DI */
2095                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI
2096                      && b != R_DI) || (i != -1 && i != R_BP && i != R_BX
2097                                        && i != R_SI && i != R_DI))
2098                     return NULL;
2099
2100                 /* ensure the user didn't specify DWORD/QWORD */
2101                 if (input->disp_size == 32 || input->disp_size == 64)
2102                     return NULL;
2103
2104                 if (s != 1 && i != -1)
2105                     return NULL;        /* no can do, in 16-bit EA */
2106                 if (b == -1 && i != -1) {
2107                     int tmp = b;
2108                     b = i;
2109                     i = tmp;
2110                 }               /* swap */
2111                 if ((b == R_SI || b == R_DI) && i != -1) {
2112                     int tmp = b;
2113                     b = i;
2114                     i = tmp;
2115                 }
2116                 /* have BX/BP as base, SI/DI index */
2117                 if (b == i)
2118                     return NULL;        /* shouldn't ever happen, in theory */
2119                 if (i != -1 && b != -1 &&
2120                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2121                     return NULL;        /* invalid combinations */
2122                 if (b == -1)    /* pure offset: handled above */
2123                     return NULL;        /* so if it gets to here, panic! */
2124
2125                 rm = -1;
2126                 if (i != -1)
2127                     switch (i * 256 + b) {
2128                     case R_SI * 256 + R_BX:
2129                         rm = 0;
2130                         break;
2131                     case R_DI * 256 + R_BX:
2132                         rm = 1;
2133                         break;
2134                     case R_SI * 256 + R_BP:
2135                         rm = 2;
2136                         break;
2137                     case R_DI * 256 + R_BP:
2138                         rm = 3;
2139                         break;
2140                 } else
2141                     switch (b) {
2142                     case R_SI:
2143                         rm = 4;
2144                         break;
2145                     case R_DI:
2146                         rm = 5;
2147                         break;
2148                     case R_BP:
2149                         rm = 6;
2150                         break;
2151                     case R_BX:
2152                         rm = 7;
2153                         break;
2154                     }
2155                 if (rm == -1)   /* can't happen, in theory */
2156                     return NULL;        /* so panic if it does */
2157
2158                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2159                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2160                     mod = 0;
2161                 else if (input->eaflags & EAF_BYTEOFFS ||
2162                          (o >= -128 && o <= 127 && seg == NO_SEG
2163                           && !forw_ref
2164                           && !(input->eaflags & EAF_WORDOFFS)))
2165                     mod = 1;
2166                 else
2167                     mod = 2;
2168
2169                 output->sib_present = false;    /* no SIB - it's 16-bit */
2170                 output->bytes = mod;    /* bytes of offset needed */
2171                 output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2172             }
2173         }
2174     }
2175
2176     output->size = 1 + output->sib_present + output->bytes;
2177     return output;
2178 }
2179
2180 static void add_asp(insn *ins, int addrbits)
2181 {
2182     int j, valid;
2183     int defdisp;
2184
2185     valid = (addrbits == 64) ? 64|32 : 32|16;
2186
2187     switch (ins->prefixes[PPS_ASIZE]) {
2188     case P_A16:
2189         valid &= 16;
2190         break;
2191     case P_A32:
2192         valid &= 32;
2193         break;
2194     case P_A64:
2195         valid &= 64;
2196         break;
2197     case P_ASP:
2198         valid &= (addrbits == 32) ? 16 : 32;
2199         break;
2200     default:
2201         break;
2202     }
2203
2204     for (j = 0; j < ins->operands; j++) {
2205         if (!(MEMORY & ~ins->oprs[j].type)) {
2206             int32_t i, b;
2207
2208             /* Verify as Register */
2209             if (ins->oprs[j].indexreg < EXPR_REG_START
2210                 || ins->oprs[j].indexreg >= REG_ENUM_LIMIT)
2211                 i = 0;
2212             else
2213                 i = reg_flags[ins->oprs[j].indexreg];
2214
2215             /* Verify as Register */
2216             if (ins->oprs[j].basereg < EXPR_REG_START
2217                 || ins->oprs[j].basereg >= REG_ENUM_LIMIT)
2218                 b = 0;
2219             else
2220                 b = reg_flags[ins->oprs[j].basereg];
2221
2222             if (ins->oprs[j].scale == 0)
2223                 i = 0;
2224
2225             if (!i && !b) {
2226                 int ds = ins->oprs[j].disp_size;
2227                 if ((addrbits != 64 && ds > 8) ||
2228                     (addrbits == 64 && ds == 16))
2229                     valid &= ds;
2230             } else {
2231                 if (!(REG16 & ~b))
2232                     valid &= 16;
2233                 if (!(REG32 & ~b))
2234                     valid &= 32;
2235                 if (!(REG64 & ~b))
2236                     valid &= 64;
2237
2238                 if (!(REG16 & ~i))
2239                     valid &= 16;
2240                 if (!(REG32 & ~i))
2241                     valid &= 32;
2242                 if (!(REG64 & ~i))
2243                     valid &= 64;
2244             }
2245         }
2246     }
2247
2248     if (valid & addrbits) {
2249         ins->addr_size = addrbits;
2250     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2251         /* Add an address size prefix */
2252         enum prefixes pref = (addrbits == 32) ? P_A16 : P_A32;
2253         ins->prefixes[PPS_ASIZE] = pref;
2254         ins->addr_size = (addrbits == 32) ? 16 : 32;
2255     } else {
2256         /* Impossible... */
2257         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2258         ins->addr_size = addrbits; /* Error recovery */
2259     }
2260
2261     defdisp = ins->addr_size == 16 ? 16 : 32;
2262
2263     for (j = 0; j < ins->operands; j++) {
2264         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2265             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp)
2266             != ins->addr_size) {
2267             /* mem_offs sizes must match the address size; if not,
2268                strip the MEM_OFFS bit and match only EA instructions */
2269             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2270         }
2271     }
2272 }