Auto-generate 0x67 prefixes without the need for \30x codes
[nasm.git] / disasm.c
blob724d0bf6832658b24e9531907cd53c21d5ed9c73
1 /* disasm.c where all the _work_ gets done in the Netwide Disassembler
3 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
4 * Julian Hall. All rights reserved. The software is
5 * redistributable under the licence given in the file "Licence"
6 * distributed in the NASM archive.
8 * initial version 27/iii/95 by Simon Tatham
9 */
11 #include <stdio.h>
12 #include <string.h>
13 #include <limits.h>
14 #include <inttypes.h>
16 #include "nasm.h"
17 #include "disasm.h"
18 #include "sync.h"
19 #include "insns.h"
21 #include "names.c"
24 * Flags that go into the `segment' field of `insn' structures
25 * during disassembly.
27 #define SEG_RELATIVE 1
28 #define SEG_32BIT 2
29 #define SEG_RMREG 4
30 #define SEG_DISP8 8
31 #define SEG_DISP16 16
32 #define SEG_DISP32 32
33 #define SEG_NODISP 64
34 #define SEG_SIGNED 128
35 #define SEG_64BIT 256
37 #include "regdis.c"
40 * Prefix information
42 struct prefix_info {
43 uint8_t osize; /* Operand size */
44 uint8_t asize; /* Address size */
45 uint8_t osp; /* Operand size prefix present */
46 uint8_t asp; /* Address size prefix present */
47 uint8_t rep; /* Rep prefix present */
48 uint8_t seg; /* Segment override prefix present */
49 uint8_t lock; /* Lock prefix present */
50 uint8_t rex; /* Rex prefix present */
53 #define getu8(x) (*(uint8_t *)(x))
54 #if defined(__i386__) || defined(__x86_64__)
55 /* Littleendian CPU which can handle unaligned references */
56 #define getu16(x) (*(uint16_t *)(x))
57 #define getu32(x) (*(uint32_t *)(x))
58 #define getu64(x) (*(uint64_t *)(x))
59 #else
60 static uint16_t getu16(uint8_t *data)
62 return (uint16_t)data[0] + ((uint16_t)data[1] << 8);
64 static uint32_t getu32(uint8_t *data)
66 return (uint32_t)getu16(data) + ((uint32_t)getu16(data+2) << 16);
68 static uint64_t getu64(uint8_t *data)
70 return (uint64_t)getu32(data) + ((uint64_t)getu32(data+4) << 32);
72 #endif
74 #define gets8(x) ((int8_t)getu8(x))
75 #define gets16(x) ((int16_t)getu16(x))
76 #define gets32(x) ((int32_t)getu32(x))
77 #define gets64(x) ((int64_t)getu64(x))
79 /* Important: regval must already have been adjusted for rex extensions */
80 static enum reg_enum whichreg(int32_t regflags, int regval, int rex)
82 if (!(regflags & (REGISTER|REGMEM)))
83 return 0; /* Registers not permissible?! */
85 regflags |= REGISTER;
87 if (!(REG_AL & ~regflags))
88 return R_AL;
89 if (!(REG_AX & ~regflags))
90 return R_AX;
91 if (!(REG_EAX & ~regflags))
92 return R_EAX;
93 if (!(REG_RAX & ~regflags))
94 return R_RAX;
95 if (!(REG_DL & ~regflags))
96 return R_DL;
97 if (!(REG_DX & ~regflags))
98 return R_DX;
99 if (!(REG_EDX & ~regflags))
100 return R_EDX;
101 if (!(REG_RDX & ~regflags))
102 return R_RDX;
103 if (!(REG_CL & ~regflags))
104 return R_CL;
105 if (!(REG_CX & ~regflags))
106 return R_CX;
107 if (!(REG_ECX & ~regflags))
108 return R_ECX;
109 if (!(REG_RCX & ~regflags))
110 return R_RCX;
111 if (!(FPU0 & ~regflags))
112 return R_ST0;
113 if (!(REG_CS & ~regflags))
114 return (regval == 1) ? R_CS : 0;
115 if (!(REG_DESS & ~regflags))
116 return (regval == 0 || regval == 2
117 || regval == 3 ? rd_sreg[regval] : 0);
118 if (!(REG_FSGS & ~regflags))
119 return (regval == 4 || regval == 5 ? rd_sreg[regval] : 0);
120 if (!(REG_SEG67 & ~regflags))
121 return (regval == 6 || regval == 7 ? rd_sreg[regval] : 0);
123 /* All the entries below look up regval in an 16-entry array */
124 if (regval < 0 || regval > 15)
125 return 0;
127 if (!(REG8 & ~regflags)) {
128 if (rex & REX_P)
129 return rd_reg8_rex[regval];
130 else
131 return rd_reg8[regval];
133 if (!(REG16 & ~regflags))
134 return rd_reg16[regval];
135 if (!(REG32 & ~regflags))
136 return rd_reg32[regval];
137 if (!(REG64 & ~regflags))
138 return rd_reg64[regval];
139 if (!(REG_SREG & ~regflags))
140 return rd_sreg[regval & 7]; /* Ignore REX */
141 if (!(REG_CREG & ~regflags))
142 return rd_creg[regval];
143 if (!(REG_DREG & ~regflags))
144 return rd_dreg[regval];
145 if (!(REG_TREG & ~regflags)) {
146 if (rex & REX_P)
147 return 0; /* TR registers are ill-defined with rex */
148 return rd_treg[regval];
150 if (!(FPUREG & ~regflags))
151 return rd_fpureg[regval & 7]; /* Ignore REX */
152 if (!(MMXREG & ~regflags))
153 return rd_mmxreg[regval & 7]; /* Ignore REX */
154 if (!(XMMREG & ~regflags))
155 return rd_xmmreg[regval];
157 return 0;
160 static const char *whichcond(int condval)
162 static int conds[] = {
163 C_O, C_NO, C_C, C_NC, C_Z, C_NZ, C_NA, C_A,
164 C_S, C_NS, C_PE, C_PO, C_L, C_NL, C_NG, C_G
166 return conditions[conds[condval]];
170 * Process a DREX suffix
172 static uint8_t *do_drex(uint8_t *data, insn *ins)
174 uint8_t drex = *data++;
175 operand *dst = &ins->oprs[ins->drexdst];
177 if ((drex & 8) != ((ins->rex & REX_OC) ? 8 : 0))
178 return NULL; /* OC0 mismatch */
179 ins->rex = (ins->rex & ~7) | (drex & 7);
181 dst->segment = SEG_RMREG;
182 dst->basereg = drex >> 4;
183 return data;
188 * Process an effective address (ModRM) specification.
190 static uint8_t *do_ea(uint8_t *data, int modrm, int asize,
191 int segsize, operand * op, insn *ins)
193 int mod, rm, scale, index, base;
194 int rex;
195 uint8_t sib = 0;
197 mod = (modrm >> 6) & 03;
198 rm = modrm & 07;
200 if (mod != 3 && rm == 4 && asize != 16)
201 sib = *data++;
203 if (ins->rex & REX_D) {
204 data = do_drex(data, ins);
205 if (!data)
206 return NULL;
208 rex = ins->rex;
210 if (mod == 3) { /* pure register version */
211 op->basereg = rm+(rex & REX_B ? 8 : 0);
212 op->segment |= SEG_RMREG;
213 return data;
216 op->addr_size = 0;
217 op->eaflags = 0;
219 if (asize == 16) {
221 * <mod> specifies the displacement size (none, byte or
222 * word), and <rm> specifies the register combination.
223 * Exception: mod=0,rm=6 does not specify [BP] as one might
224 * expect, but instead specifies [disp16].
226 op->indexreg = op->basereg = -1;
227 op->scale = 1; /* always, in 16 bits */
228 switch (rm) {
229 case 0:
230 op->basereg = R_BX;
231 op->indexreg = R_SI;
232 break;
233 case 1:
234 op->basereg = R_BX;
235 op->indexreg = R_DI;
236 break;
237 case 2:
238 op->basereg = R_BP;
239 op->indexreg = R_SI;
240 break;
241 case 3:
242 op->basereg = R_BP;
243 op->indexreg = R_DI;
244 break;
245 case 4:
246 op->basereg = R_SI;
247 break;
248 case 5:
249 op->basereg = R_DI;
250 break;
251 case 6:
252 op->basereg = R_BP;
253 break;
254 case 7:
255 op->basereg = R_BX;
256 break;
258 if (rm == 6 && mod == 0) { /* special case */
259 op->basereg = -1;
260 if (segsize != 16)
261 op->addr_size = 16;
262 mod = 2; /* fake disp16 */
264 switch (mod) {
265 case 0:
266 op->segment |= SEG_NODISP;
267 break;
268 case 1:
269 op->segment |= SEG_DISP8;
270 op->offset = (int8_t)*data++;
271 break;
272 case 2:
273 op->segment |= SEG_DISP16;
274 op->offset = *data++;
275 op->offset |= ((unsigned)*data++) << 8;
276 break;
278 return data;
279 } else {
281 * Once again, <mod> specifies displacement size (this time
282 * none, byte or *dword*), while <rm> specifies the base
283 * register. Again, [EBP] is missing, replaced by a pure
284 * disp32 (this time that's mod=0,rm=*5*) in 32-bit mode,
285 * and RIP-relative addressing in 64-bit mode.
287 * However, rm=4
288 * indicates not a single base register, but instead the
289 * presence of a SIB byte...
291 int a64 = asize == 64;
293 op->indexreg = -1;
295 if (a64)
296 op->basereg = rd_reg64[rm | ((rex & REX_B) ? 8 : 0)];
297 else
298 op->basereg = rd_reg32[rm | ((rex & REX_B) ? 8 : 0)];
300 if (rm == 5 && mod == 0) {
301 if (segsize == 64) {
302 op->eaflags |= EAF_REL;
303 op->segment |= SEG_RELATIVE;
304 mod = 2; /* fake disp32 */
307 if (asize != 64)
308 op->addr_size = asize;
310 op->basereg = -1;
311 mod = 2; /* fake disp32 */
314 if (rm == 4) { /* process SIB */
315 scale = (sib >> 6) & 03;
316 index = (sib >> 3) & 07;
317 base = sib & 07;
319 op->scale = 1 << scale;
321 if (index == 4)
322 op->indexreg = -1; /* ESP/RSP/R12 cannot be an index */
323 else if (a64)
324 op->indexreg = rd_reg64[index | ((rex & REX_X) ? 8 : 0)];
325 else
326 op->indexreg = rd_reg64[index | ((rex & REX_X) ? 8 : 0)];
328 if (base == 5 && mod == 0) {
329 op->basereg = -1;
330 mod = 2; /* Fake disp32 */
331 } else if (a64)
332 op->basereg = rd_reg64[base | ((rex & REX_B) ? 8 : 0)];
333 else
334 op->basereg = rd_reg32[base | ((rex & REX_B) ? 8 : 0)];
336 if (segsize != 32)
337 op->addr_size = 32;
340 switch (mod) {
341 case 0:
342 op->segment |= SEG_NODISP;
343 break;
344 case 1:
345 op->segment |= SEG_DISP8;
346 op->offset = gets8(data);
347 data++;
348 break;
349 case 2:
350 op->segment |= SEG_DISP32;
351 op->offset = getu32(data);
352 data += 4;
353 break;
355 return data;
360 * Determine whether the instruction template in t corresponds to the data
361 * stream in data. Return the number of bytes matched if so.
363 static int matches(const struct itemplate *t, uint8_t *data,
364 const struct prefix_info *prefix, int segsize, insn *ins)
366 uint8_t *r = (uint8_t *)(t->code);
367 uint8_t *origdata = data;
368 int a_used = FALSE, o_used = FALSE;
369 enum prefixes drep = 0;
370 uint8_t lock = prefix->lock;
371 int osize = prefix->osize;
372 int asize = prefix->asize;
373 int i;
375 for (i = 0; i < MAX_OPERANDS; i++) {
376 ins->oprs[i].segment = ins->oprs[i].addr_size =
377 (segsize == 64 ? SEG_64BIT : segsize == 32 ? SEG_32BIT : 0);
379 ins->condition = -1;
380 ins->rex = prefix->rex;
382 if (t->flags & (segsize == 64 ? IF_NOLONG : IF_LONG))
383 return FALSE;
385 if (prefix->rep == 0xF2)
386 drep = P_REPNE;
387 else if (prefix->rep == 0xF3)
388 drep = P_REP;
390 while (*r) {
391 int c = *r++;
393 /* FIX: change this into a switch */
394 if (c >= 01 && c <= 03) {
395 while (c--)
396 if (*r++ != *data++)
397 return FALSE;
398 } else if (c == 04) {
399 switch (*data++) {
400 case 0x07:
401 ins->oprs[0].basereg = 0;
402 break;
403 case 0x17:
404 ins->oprs[0].basereg = 2;
405 break;
406 case 0x1F:
407 ins->oprs[0].basereg = 3;
408 break;
409 default:
410 return FALSE;
412 } else if (c == 05) {
413 switch (*data++) {
414 case 0xA1:
415 ins->oprs[0].basereg = 4;
416 break;
417 case 0xA9:
418 ins->oprs[0].basereg = 5;
419 break;
420 default:
421 return FALSE;
423 } else if (c == 06) {
424 switch (*data++) {
425 case 0x06:
426 ins->oprs[0].basereg = 0;
427 break;
428 case 0x0E:
429 ins->oprs[0].basereg = 1;
430 break;
431 case 0x16:
432 ins->oprs[0].basereg = 2;
433 break;
434 case 0x1E:
435 ins->oprs[0].basereg = 3;
436 break;
437 default:
438 return FALSE;
440 } else if (c == 07) {
441 switch (*data++) {
442 case 0xA0:
443 ins->oprs[0].basereg = 4;
444 break;
445 case 0xA8:
446 ins->oprs[0].basereg = 5;
447 break;
448 default:
449 return FALSE;
451 } else if (c >= 010 && c <= 013) {
452 int t = *r++, d = *data++;
453 if (d < t || d > t + 7)
454 return FALSE;
455 else {
456 ins->oprs[c - 010].basereg = (d-t)+
457 (ins->rex & REX_B ? 8 : 0);
458 ins->oprs[c - 010].segment |= SEG_RMREG;
460 } else if (c >= 014 && c <= 017) {
461 ins->oprs[c - 014].offset = (int8_t)*data++;
462 ins->oprs[c - 014].segment |= SEG_SIGNED;
463 } else if (c >= 020 && c <= 023) {
464 ins->oprs[c - 020].offset = *data++;
465 } else if (c >= 024 && c <= 027) {
466 ins->oprs[c - 024].offset = *data++;
467 } else if (c >= 030 && c <= 033) {
468 ins->oprs[c - 030].offset = getu16(data);
469 data += 2;
470 } else if (c >= 034 && c <= 037) {
471 if (osize == 32) {
472 ins->oprs[c - 034].offset = getu32(data);
473 data += 4;
474 } else {
475 ins->oprs[c - 034].offset = getu16(data);
476 data += 2;
478 if (segsize != asize)
479 ins->oprs[c - 034].addr_size = asize;
480 } else if (c >= 040 && c <= 043) {
481 ins->oprs[c - 040].offset = getu32(data);
482 data += 4;
483 } else if (c >= 044 && c <= 047) {
484 switch (asize) {
485 case 16:
486 ins->oprs[c - 044].offset = getu16(data);
487 data += 2;
488 break;
489 case 32:
490 ins->oprs[c - 044].offset = getu32(data);
491 data += 4;
492 break;
493 case 64:
494 ins->oprs[c - 044].offset = getu64(data);
495 data += 8;
496 break;
498 if (segsize != asize)
499 ins->oprs[c - 044].addr_size = asize;
500 } else if (c >= 050 && c <= 053) {
501 ins->oprs[c - 050].offset = gets8(data++);
502 ins->oprs[c - 050].segment |= SEG_RELATIVE;
503 } else if (c >= 054 && c <= 057) {
504 ins->oprs[c - 054].offset = getu64(data);
505 data += 8;
506 } else if (c >= 060 && c <= 063) {
507 ins->oprs[c - 060].offset = gets16(data);
508 data += 2;
509 ins->oprs[c - 060].segment |= SEG_RELATIVE;
510 ins->oprs[c - 060].segment &= ~SEG_32BIT;
511 } else if (c >= 064 && c <= 067) {
512 if (osize == 16) {
513 ins->oprs[c - 064].offset = getu16(data);
514 data += 2;
515 ins->oprs[c - 064].segment &= ~(SEG_32BIT|SEG_64BIT);
516 } else if (osize == 32) {
517 ins->oprs[c - 064].offset = getu32(data);
518 data += 4;
519 ins->oprs[c - 064].segment &= ~SEG_64BIT;
520 ins->oprs[c - 064].segment |= SEG_32BIT;
522 if (segsize != osize) {
523 ins->oprs[c - 064].type =
524 (ins->oprs[c - 064].type & ~SIZE_MASK)
525 | ((osize == 16) ? BITS16 : BITS32);
527 } else if (c >= 070 && c <= 073) {
528 ins->oprs[c - 070].offset = getu32(data);
529 data += 4;
530 ins->oprs[c - 070].segment |= SEG_32BIT | SEG_RELATIVE;
531 } else if (c >= 0100 && c < 0140) {
532 int modrm = *data++;
533 ins->oprs[c & 07].segment |= SEG_RMREG;
534 data = do_ea(data, modrm, asize, segsize,
535 &ins->oprs[(c >> 3) & 07], ins);
536 if (!data)
537 return FALSE;
538 ins->oprs[c & 07].basereg = ((modrm >> 3)&7)+
539 (ins->rex & REX_R ? 8 : 0);
540 } else if (c >= 0140 && c <= 0143) {
541 ins->oprs[c - 0140].offset = getu16(data);
542 data += 2;
543 } else if (c >= 0150 && c <= 0153) {
544 ins->oprs[c - 0150].offset = getu32(data);
545 data += 4;
546 } else if (c >= 0160 && c <= 0167) {
547 ins->rex |= (c & 4) ? REX_D|REX_OC : REX_D;
548 ins->drexdst = c & 3;
549 } else if (c == 0170) {
550 if (*data++)
551 return FALSE;
552 } else if (c == 0171) {
553 data = do_drex(data, ins);
554 if (!data)
555 return FALSE;
556 } else if (c >= 0200 && c <= 0277) {
557 int modrm = *data++;
558 if (((modrm >> 3) & 07) != (c & 07))
559 return FALSE; /* spare field doesn't match up */
560 data = do_ea(data, modrm, asize, segsize,
561 &ins->oprs[(c >> 3) & 07], ins);
562 if (!data)
563 return FALSE;
564 } else if (c == 0310) {
565 if (asize != 16)
566 return FALSE;
567 else
568 a_used = TRUE;
569 } else if (c == 0311) {
570 if (asize == 16)
571 return FALSE;
572 else
573 a_used = TRUE;
574 } else if (c == 0312) {
575 if (asize != segsize)
576 return FALSE;
577 else
578 a_used = TRUE;
579 } else if (c == 0313) {
580 if (asize != 64)
581 return FALSE;
582 else
583 a_used = TRUE;
584 } else if (c == 0320) {
585 if (osize != 16)
586 return FALSE;
587 else
588 o_used = TRUE;
589 } else if (c == 0321) {
590 if (osize != 32)
591 return FALSE;
592 else
593 o_used = TRUE;
594 } else if (c == 0322) {
595 if (osize != (segsize == 16) ? 16 : 32)
596 return FALSE;
597 else
598 o_used = TRUE;
599 } else if (c == 0323) {
600 ins->rex |= REX_W; /* 64-bit only instruction */
601 osize = 64;
602 } else if (c == 0324) {
603 if (!(ins->rex & (REX_P|REX_W)) || osize != 64)
604 return FALSE;
605 } else if (c == 0330) {
606 int t = *r++, d = *data++;
607 if (d < t || d > t + 15)
608 return FALSE;
609 else
610 ins->condition = d - t;
611 } else if (c == 0331) {
612 if (prefix->rep)
613 return FALSE;
614 } else if (c == 0332) {
615 if (prefix->rep != 0xF2)
616 return FALSE;
617 } else if (c == 0333) {
618 if (prefix->rep != 0xF3)
619 return FALSE;
620 drep = 0;
621 } else if (c == 0334) {
622 if (lock) {
623 ins->rex |= REX_R;
624 lock = 0;
626 } else if (c == 0335) {
627 if (drep == P_REP)
628 drep = P_REPE;
629 } else if (c == 0364) {
630 if (prefix->osp)
631 return FALSE;
632 } else if (c == 0365) {
633 if (prefix->asp)
634 return FALSE;
635 } else if (c == 0366) {
636 if (!prefix->osp)
637 return FALSE;
638 o_used = TRUE;
639 } else if (c == 0367) {
640 if (!prefix->asp)
641 return FALSE;
642 o_used = TRUE;
646 /* REX cannot be combined with DREX */
647 if ((ins->rex & REX_D) && (prefix->rex))
648 return FALSE;
651 * Check for unused rep or a/o prefixes.
653 for (i = 0; i < t->operands; i++) {
654 if (ins->oprs[i].segment != SEG_RMREG)
655 a_used = TRUE;
658 ins->nprefix = 0;
659 if (lock)
660 ins->prefixes[ins->nprefix++] = P_LOCK;
661 if (drep)
662 ins->prefixes[ins->nprefix++] = drep;
663 if (!a_used && asize != segsize)
664 ins->prefixes[ins->nprefix++] = asize == 16 ? P_A16 : P_A32;
665 if (!o_used && osize == ((segsize == 16) ? 32 : 16))
666 ins->prefixes[ins->nprefix++] = osize == 16 ? P_O16 : P_O32;
668 /* Fix: check for redundant REX prefixes */
670 return data - origdata;
673 int32_t disasm(uint8_t *data, char *output, int outbufsize, int segsize,
674 int32_t offset, int autosync, uint32_t prefer)
676 const struct itemplate * const *p, * const *best_p;
677 const struct disasm_index *ix;
678 uint8_t *dp;
679 int length, best_length = 0;
680 char *segover;
681 int i, slen, colon, n;
682 uint8_t *origdata;
683 int works;
684 insn tmp_ins, ins;
685 uint32_t goodness, best;
686 int best_pref;
687 struct prefix_info prefix;
689 memset(&ins, 0, sizeof ins);
692 * Scan for prefixes.
694 memset(&prefix, 0, sizeof prefix);
695 prefix.asize = segsize;
696 prefix.osize = (segsize == 64) ? 32 : segsize;
697 segover = NULL;
698 origdata = data;
699 for (;;) {
700 if (*data == 0xF3 || *data == 0xF2)
701 prefix.rep = *data++;
702 else if (*data == 0xF0)
703 prefix.lock = *data++;
704 else if (*data == 0x2E)
705 segover = "cs", prefix.seg = *data++;
706 else if (*data == 0x36)
707 segover = "ss", prefix.seg = *data++;
708 else if (*data == 0x3E)
709 segover = "ds", prefix.seg = *data++;
710 else if (*data == 0x26)
711 segover = "es", prefix.seg = *data++;
712 else if (*data == 0x64)
713 segover = "fs", prefix.seg = *data++;
714 else if (*data == 0x65)
715 segover = "gs", prefix.seg = *data++;
716 else if (*data == 0x66) {
717 prefix.osize = (segsize == 16) ? 32 : 16;
718 prefix.osp = *data++;
719 } else if (*data == 0x67) {
720 prefix.asize = (segsize == 32) ? 16 : 32;
721 prefix.asp = *data++;
722 } else if (segsize == 64 && (*data & 0xf0) == REX_P) {
723 prefix.rex = *data++;
724 if (prefix.rex & REX_W)
725 prefix.osize = 64;
726 break; /* REX is always the last prefix */
727 } else {
728 break;
732 best = -1; /* Worst possible */
733 best_p = NULL;
734 best_pref = INT_MAX;
736 dp = data;
737 ix = itable + *dp++;
738 while (ix->n == (size_t)-1) {
739 ix = (const struct disasm_index *)ix->p + *dp++;
742 p = (const struct itemplate * const *)ix->p;
743 for (n = ix->n; n; n--, p++) {
744 if ((length = matches(*p, data, &prefix, segsize, &tmp_ins))) {
745 works = TRUE;
747 * Final check to make sure the types of r/m match up.
748 * XXX: Need to make sure this is actually correct.
750 for (i = 0; i < (*p)->operands; i++) {
751 if (!((*p)->opd[i] & SAME_AS) &&
753 /* If it's a mem-only EA but we have a register, die. */
754 ((tmp_ins.oprs[i].segment & SEG_RMREG) &&
755 !(MEMORY & ~(*p)->opd[i])) ||
756 /* If it's a reg-only EA but we have a memory ref, die. */
757 (!(tmp_ins.oprs[i].segment & SEG_RMREG) &&
758 !(REG_EA & ~(*p)->opd[i]) &&
759 !((*p)->opd[i] & REG_SMASK)) ||
760 /* Register type mismatch (eg FS vs REG_DESS): die. */
761 ((((*p)->opd[i] & (REGISTER | FPUREG)) ||
762 (tmp_ins.oprs[i].segment & SEG_RMREG)) &&
763 !whichreg((*p)->opd[i],
764 tmp_ins.oprs[i].basereg, tmp_ins.rex))
765 )) {
766 works = FALSE;
767 break;
772 * Note: we always prefer instructions which incorporate
773 * prefixes in the instructions themselves. This is to allow
774 * e.g. PAUSE to be preferred to REP NOP, and deal with
775 * MMX/SSE instructions where prefixes are used to select
776 * between MMX and SSE register sets or outright opcode
777 * selection.
779 if (works) {
780 goodness = ((*p)->flags & IF_PFMASK) ^ prefer;
781 if (tmp_ins.nprefix < best_pref ||
782 (tmp_ins.nprefix == best_pref && goodness < best)) {
783 /* This is the best one found so far */
784 best = goodness;
785 best_p = p;
786 best_pref = tmp_ins.nprefix;
787 best_length = length;
788 ins = tmp_ins;
794 if (!best_p)
795 return 0; /* no instruction was matched */
797 /* Pick the best match */
798 p = best_p;
799 length = best_length;
801 slen = 0;
803 /* TODO: snprintf returns the value that the string would have if
804 * the buffer were long enough, and not the actual length of
805 * the returned string, so each instance of using the return
806 * value of snprintf should actually be checked to assure that
807 * the return value is "sane." Maybe a macro wrapper could
808 * be used for that purpose.
810 for (i = 0; i < ins.nprefix; i++)
811 switch (ins.prefixes[i]) {
812 case P_LOCK:
813 slen += snprintf(output + slen, outbufsize - slen, "lock ");
814 break;
815 case P_REP:
816 slen += snprintf(output + slen, outbufsize - slen, "rep ");
817 break;
818 case P_REPE:
819 slen += snprintf(output + slen, outbufsize - slen, "repe ");
820 break;
821 case P_REPNE:
822 slen += snprintf(output + slen, outbufsize - slen, "repne ");
823 break;
824 case P_A16:
825 slen += snprintf(output + slen, outbufsize - slen, "a16 ");
826 break;
827 case P_A32:
828 slen += snprintf(output + slen, outbufsize - slen, "a32 ");
829 break;
830 case P_O16:
831 slen += snprintf(output + slen, outbufsize - slen, "o16 ");
832 break;
833 case P_O32:
834 slen += snprintf(output + slen, outbufsize - slen, "o32 ");
835 break;
836 default:
837 break;
840 for (i = 0; i < (int)elements(ico); i++)
841 if ((*p)->opcode == ico[i]) {
842 slen +=
843 snprintf(output + slen, outbufsize - slen, "%s%s", icn[i],
844 whichcond(ins.condition));
845 break;
847 if (i >= (int)elements(ico))
848 slen +=
849 snprintf(output + slen, outbufsize - slen, "%s",
850 insn_names[(*p)->opcode]);
851 colon = FALSE;
852 length += data - origdata; /* fix up for prefixes */
853 for (i = 0; i < (*p)->operands; i++) {
854 opflags_t t = (*p)->opd[i];
855 const operand *o = &ins.oprs[i];
856 int64_t offs;
858 if (t & SAME_AS) {
859 o = &ins.oprs[t & ~SAME_AS];
860 t = (*p)->opd[t & ~SAME_AS];
863 output[slen++] = (colon ? ':' : i == 0 ? ' ' : ',');
865 offs = o->offset;
866 if (o->segment & SEG_RELATIVE) {
867 offs += offset + length;
869 * sort out wraparound
871 if (!(o->segment & (SEG_32BIT|SEG_64BIT)))
872 offs &= 0xffff;
874 * add sync marker, if autosync is on
876 if (autosync)
877 add_sync(offs, 0L);
880 if (t & COLON)
881 colon = TRUE;
882 else
883 colon = FALSE;
885 if ((t & (REGISTER | FPUREG)) ||
886 (o->segment & SEG_RMREG)) {
887 enum reg_enum reg;
888 reg = whichreg(t, o->basereg, ins.rex);
889 if (t & TO)
890 slen += snprintf(output + slen, outbufsize - slen, "to ");
891 slen += snprintf(output + slen, outbufsize - slen, "%s",
892 reg_names[reg - EXPR_REG_START]);
893 } else if (!(UNITY & ~t)) {
894 output[slen++] = '1';
895 } else if (t & IMMEDIATE) {
896 if (t & BITS8) {
897 slen +=
898 snprintf(output + slen, outbufsize - slen, "byte ");
899 if (o->segment & SEG_SIGNED) {
900 if (offs < 0) {
901 offs *= -1;
902 output[slen++] = '-';
903 } else
904 output[slen++] = '+';
906 } else if (t & BITS16) {
907 slen +=
908 snprintf(output + slen, outbufsize - slen, "word ");
909 } else if (t & BITS32) {
910 slen +=
911 snprintf(output + slen, outbufsize - slen, "dword ");
912 } else if (t & BITS64) {
913 slen +=
914 snprintf(output + slen, outbufsize - slen, "qword ");
915 } else if (t & NEAR) {
916 slen +=
917 snprintf(output + slen, outbufsize - slen, "near ");
918 } else if (t & SHORT) {
919 slen +=
920 snprintf(output + slen, outbufsize - slen, "short ");
922 slen +=
923 snprintf(output + slen, outbufsize - slen, "0x%"PRIx64"",
924 offs);
925 } else if (!(MEM_OFFS & ~t)) {
926 slen +=
927 snprintf(output + slen, outbufsize - slen, "[%s%s%s0x%"PRIx64"]",
928 (segover ? segover : ""),
929 (segover ? ":" : ""),
930 (o->addr_size ==
931 32 ? "dword " : o->addr_size ==
932 16 ? "word " : ""), offs);
933 segover = NULL;
934 } else if (!(REGMEM & ~t)) {
935 int started = FALSE;
936 if (t & BITS8)
937 slen +=
938 snprintf(output + slen, outbufsize - slen, "byte ");
939 if (t & BITS16)
940 slen +=
941 snprintf(output + slen, outbufsize - slen, "word ");
942 if (t & BITS32)
943 slen +=
944 snprintf(output + slen, outbufsize - slen, "dword ");
945 if (t & BITS64)
946 slen +=
947 snprintf(output + slen, outbufsize - slen, "qword ");
948 if (t & BITS80)
949 slen +=
950 snprintf(output + slen, outbufsize - slen, "tword ");
951 if (t & FAR)
952 slen += snprintf(output + slen, outbufsize - slen, "far ");
953 if (t & NEAR)
954 slen +=
955 snprintf(output + slen, outbufsize - slen, "near ");
956 output[slen++] = '[';
957 if (o->addr_size)
958 slen += snprintf(output + slen, outbufsize - slen, "%s",
959 (o->addr_size == 64 ? "qword " :
960 o->addr_size == 32 ? "dword " :
961 o->addr_size == 16 ? "word " :
962 ""));
963 if (o->eaflags & EAF_REL)
964 slen += snprintf(output + slen, outbufsize - slen, "rel ");
965 if (segover) {
966 slen +=
967 snprintf(output + slen, outbufsize - slen, "%s:",
968 segover);
969 segover = NULL;
971 if (o->basereg != -1) {
972 slen += snprintf(output + slen, outbufsize - slen, "%s",
973 reg_names[(o->basereg -
974 EXPR_REG_START)]);
975 started = TRUE;
977 if (o->indexreg != -1) {
978 if (started)
979 output[slen++] = '+';
980 slen += snprintf(output + slen, outbufsize - slen, "%s",
981 reg_names[(o->indexreg -
982 EXPR_REG_START)]);
983 if (o->scale > 1)
984 slen +=
985 snprintf(output + slen, outbufsize - slen, "*%d",
986 o->scale);
987 started = TRUE;
989 if (o->segment & SEG_DISP8) {
990 int minus = 0;
991 int8_t offset = offs;
992 if (offset < 0) {
993 minus = 1;
994 offset = -offset;
996 slen +=
997 snprintf(output + slen, outbufsize - slen, "%s0x%"PRIx8"",
998 minus ? "-" : "+", offset);
999 } else if (o->segment & SEG_DISP16) {
1000 int minus = 0;
1001 int16_t offset = offs;
1002 if (offset < 0) {
1003 minus = 1;
1004 offset = -offset;
1006 slen +=
1007 snprintf(output + slen, outbufsize - slen, "%s0x%"PRIx16"",
1008 minus ? "-" : started ? "+" : "", offset);
1009 } else if (o->segment & SEG_DISP32) {
1010 char *prefix = "";
1011 int32_t offset = offs;
1012 if (offset < 0) {
1013 offset = -offset;
1014 prefix = "-";
1015 } else {
1016 prefix = started ? "+" : "";
1018 slen +=
1019 snprintf(output + slen, outbufsize - slen,
1020 "%s0x%"PRIx32"", prefix, offset);
1022 output[slen++] = ']';
1023 } else {
1024 slen +=
1025 snprintf(output + slen, outbufsize - slen, "<operand%d>",
1029 output[slen] = '\0';
1030 if (segover) { /* unused segment override */
1031 char *p = output;
1032 int count = slen + 1;
1033 while (count--)
1034 p[count + 3] = p[count];
1035 strncpy(output, segover, 2);
1036 output[2] = ' ';
1038 return length;
1041 int32_t eatbyte(uint8_t *data, char *output, int outbufsize)
1043 snprintf(output, outbufsize, "db 0x%02X", *data);
1044 return 1;