1 /* disasm.c where all the _work_ gets done in the Netwide Disassembler
3 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
4 * Julian Hall. All rights reserved. The software is
5 * redistributable under the licence given in the file "Licence"
6 * distributed in the NASM archive.
8 * initial version 27/iii/95 by Simon Tatham
22 extern struct itemplate
**itable
[];
25 * Flags that go into the `segment' field of `insn' structures
28 #define SEG_RELATIVE 1
35 #define SEG_SIGNED 128
37 static int whichreg(int32_t regflags
, int regval
)
41 if (!(REG_AL
& ~regflags
))
43 if (!(REG_AX
& ~regflags
))
45 if (!(REG_EAX
& ~regflags
))
47 if (!(REG_DL
& ~regflags
))
49 if (!(REG_DX
& ~regflags
))
51 if (!(REG_EDX
& ~regflags
))
53 if (!(REG_CL
& ~regflags
))
55 if (!(REG_CX
& ~regflags
))
57 if (!(REG_ECX
& ~regflags
))
59 if (!(FPU0
& ~regflags
))
61 if (!(REG_CS
& ~regflags
))
62 return (regval
== 1) ? R_CS
: 0;
63 if (!(REG_DESS
& ~regflags
))
64 return (regval
== 0 || regval
== 2
65 || regval
== 3 ? sreg
[regval
] : 0);
66 if (!(REG_FSGS
& ~regflags
))
67 return (regval
== 4 || regval
== 5 ? sreg
[regval
] : 0);
68 if (!(REG_SEG67
& ~regflags
))
69 return (regval
== 6 || regval
== 7 ? sreg
[regval
] : 0);
71 /* All the entries below look up regval in an 8-entry array */
72 if (regval
< 0 || regval
> 7)
75 if (!((REGMEM
| BITS8
) & ~regflags
))
77 if (!((REGMEM
| BITS16
) & ~regflags
))
79 if (!((REGMEM
| BITS32
) & ~regflags
))
81 if (!(REG_SREG
& ~regflags
))
83 if (!(REG_CREG
& ~regflags
))
85 if (!(REG_DREG
& ~regflags
))
87 if (!(REG_TREG
& ~regflags
))
89 if (!(FPUREG
& ~regflags
))
90 return fpureg
[regval
];
91 if (!(MMXREG
& ~regflags
))
92 return mmxreg
[regval
];
93 if (!(XMMREG
& ~regflags
))
94 return xmmreg
[regval
];
99 static const char *whichcond(int condval
)
101 static int conds
[] = {
102 C_O
, C_NO
, C_C
, C_NC
, C_Z
, C_NZ
, C_NA
, C_A
,
103 C_S
, C_NS
, C_PE
, C_PO
, C_L
, C_NL
, C_NG
, C_G
105 return conditions
[conds
[condval
]];
109 * Process an effective address (ModRM) specification.
111 static uint8_t *do_ea(uint8_t *data
, int modrm
, int asize
,
112 int segsize
, operand
* op
)
114 int mod
, rm
, scale
, index
, base
;
116 mod
= (modrm
>> 6) & 03;
119 if (mod
== 3) { /* pure register version */
121 op
->segment
|= SEG_RMREG
;
129 * <mod> specifies the displacement size (none, byte or
130 * word), and <rm> specifies the register combination.
131 * Exception: mod=0,rm=6 does not specify [BP] as one might
132 * expect, but instead specifies [disp16].
134 op
->indexreg
= op
->basereg
= -1;
135 op
->scale
= 1; /* always, in 16 bits */
166 if (rm
== 6 && mod
== 0) { /* special case */
170 mod
= 2; /* fake disp16 */
174 op
->segment
|= SEG_NODISP
;
177 op
->segment
|= SEG_DISP8
;
178 op
->offset
= (char)*data
++;
181 op
->segment
|= SEG_DISP16
;
182 op
->offset
= *data
++;
183 op
->offset
|= ((unsigned)*data
++) << 8;
189 * Once again, <mod> specifies displacement size (this time
190 * none, byte or *dword*), while <rm> specifies the base
191 * register. Again, [EBP] is missing, replaced by a pure
192 * disp32 (this time that's mod=0,rm=*5*). However, rm=4
193 * indicates not a single base register, but instead the
194 * presence of a SIB byte...
220 if (rm
== 5 && mod
== 0) {
224 mod
= 2; /* fake disp32 */
226 if (rm
== 4) { /* process SIB */
227 scale
= (*data
>> 6) & 03;
228 index
= (*data
>> 3) & 07;
232 op
->scale
= 1 << scale
;
235 op
->indexreg
= R_EAX
;
238 op
->indexreg
= R_ECX
;
241 op
->indexreg
= R_EDX
;
244 op
->indexreg
= R_EBX
;
250 op
->indexreg
= R_EBP
;
253 op
->indexreg
= R_ESI
;
256 op
->indexreg
= R_EDI
;
293 op
->segment
|= SEG_NODISP
;
296 op
->segment
|= SEG_DISP8
;
297 op
->offset
= (char)*data
++;
300 op
->segment
|= SEG_DISP32
;
301 op
->offset
= *data
++;
302 op
->offset
|= ((unsigned)*data
++) << 8;
303 op
->offset
|= ((int32_t)*data
++) << 16;
304 op
->offset
|= ((int32_t)*data
++) << 24;
312 * Determine whether the instruction template in t corresponds to the data
313 * stream in data. Return the number of bytes matched if so.
315 static int matches(struct itemplate
*t
, uint8_t *data
, int asize
,
316 int osize
, int segsize
, int rep
, insn
* ins
)
318 uint8_t *r
= (uint8_t *)(t
->code
);
319 uint8_t *origdata
= data
;
320 int a_used
= FALSE
, o_used
= FALSE
;
325 else if (rep
== 0xF3)
330 if (c
>= 01 && c
<= 03) {
338 ins
->oprs
[0].basereg
= 0;
341 ins
->oprs
[0].basereg
= 2;
344 ins
->oprs
[0].basereg
= 3;
353 ins
->oprs
[0].basereg
= 4;
356 ins
->oprs
[0].basereg
= 5;
365 ins
->oprs
[0].basereg
= 0;
368 ins
->oprs
[0].basereg
= 1;
371 ins
->oprs
[0].basereg
= 2;
374 ins
->oprs
[0].basereg
= 3;
383 ins
->oprs
[0].basereg
= 4;
386 ins
->oprs
[0].basereg
= 5;
392 if (c
>= 010 && c
<= 012) {
393 int t
= *r
++, d
= *data
++;
394 if (d
< t
|| d
> t
+ 7)
397 ins
->oprs
[c
- 010].basereg
= d
- t
;
398 ins
->oprs
[c
- 010].segment
|= SEG_RMREG
;
404 if (c
>= 014 && c
<= 016) {
405 ins
->oprs
[c
- 014].offset
= (char)*data
++;
406 ins
->oprs
[c
- 014].segment
|= SEG_SIGNED
;
408 if (c
>= 020 && c
<= 022)
409 ins
->oprs
[c
- 020].offset
= *data
++;
410 if (c
>= 024 && c
<= 026)
411 ins
->oprs
[c
- 024].offset
= *data
++;
412 if (c
>= 030 && c
<= 032) {
413 ins
->oprs
[c
- 030].offset
= *data
++;
414 ins
->oprs
[c
- 030].offset
|= (((unsigned)*data
++) << 8);
416 if (c
>= 034 && c
<= 036) {
417 ins
->oprs
[c
- 034].offset
= *data
++;
418 ins
->oprs
[c
- 034].offset
|= (((unsigned)*data
++) << 8);
420 ins
->oprs
[c
- 034].offset
|= (((int32_t)*data
++) << 16);
421 ins
->oprs
[c
- 034].offset
|= (((int32_t)*data
++) << 24);
423 if (segsize
!= asize
)
424 ins
->oprs
[c
- 034].addr_size
= asize
;
426 if (c
>= 040 && c
<= 042) {
427 ins
->oprs
[c
- 040].offset
= *data
++;
428 ins
->oprs
[c
- 040].offset
|= (((unsigned)*data
++) << 8);
429 ins
->oprs
[c
- 040].offset
|= (((int32_t)*data
++) << 16);
430 ins
->oprs
[c
- 040].offset
|= (((int32_t)*data
++) << 24);
432 if (c
>= 044 && c
<= 046) {
433 ins
->oprs
[c
- 044].offset
= *data
++;
434 ins
->oprs
[c
- 044].offset
|= (((unsigned)*data
++) << 8);
436 ins
->oprs
[c
- 044].offset
|= (((int32_t)*data
++) << 16);
437 ins
->oprs
[c
- 044].offset
|= (((int32_t)*data
++) << 24);
439 if (segsize
!= asize
)
440 ins
->oprs
[c
- 044].addr_size
= asize
;
442 if (c
>= 050 && c
<= 052) {
443 ins
->oprs
[c
- 050].offset
= (char)*data
++;
444 ins
->oprs
[c
- 050].segment
|= SEG_RELATIVE
;
446 if (c
>= 060 && c
<= 062) {
447 ins
->oprs
[c
- 060].offset
= *data
++;
448 ins
->oprs
[c
- 060].offset
|= (((unsigned)*data
++) << 8);
449 ins
->oprs
[c
- 060].segment
|= SEG_RELATIVE
;
450 ins
->oprs
[c
- 060].segment
&= ~SEG_32BIT
;
452 if (c
>= 064 && c
<= 066) {
453 ins
->oprs
[c
- 064].offset
= *data
++;
454 ins
->oprs
[c
- 064].offset
|= (((unsigned)*data
++) << 8);
456 ins
->oprs
[c
- 064].offset
|= (((int32_t)*data
++) << 16);
457 ins
->oprs
[c
- 064].offset
|= (((int32_t)*data
++) << 24);
458 ins
->oprs
[c
- 064].segment
|= SEG_32BIT
;
460 ins
->oprs
[c
- 064].segment
&= ~SEG_32BIT
;
461 ins
->oprs
[c
- 064].segment
|= SEG_RELATIVE
;
462 if (segsize
!= osize
) {
463 ins
->oprs
[c
- 064].type
=
464 (ins
->oprs
[c
- 064].type
& NON_SIZE
)
465 | ((osize
== 16) ? BITS16
: BITS32
);
468 if (c
>= 070 && c
<= 072) {
469 ins
->oprs
[c
- 070].offset
= *data
++;
470 ins
->oprs
[c
- 070].offset
|= (((unsigned)*data
++) << 8);
471 ins
->oprs
[c
- 070].offset
|= (((int32_t)*data
++) << 16);
472 ins
->oprs
[c
- 070].offset
|= (((int32_t)*data
++) << 24);
473 ins
->oprs
[c
- 070].segment
|= SEG_32BIT
| SEG_RELATIVE
;
475 if (c
>= 0100 && c
< 0130) {
477 ins
->oprs
[c
& 07].basereg
= (modrm
>> 3) & 07;
478 ins
->oprs
[c
& 07].segment
|= SEG_RMREG
;
479 data
= do_ea(data
, modrm
, asize
, segsize
,
480 &ins
->oprs
[(c
>> 3) & 07]);
482 if (c
>= 0130 && c
<= 0132) {
483 ins
->oprs
[c
- 0130].offset
= *data
++;
484 ins
->oprs
[c
- 0130].offset
|= (((unsigned)*data
++) << 8);
486 if (c
>= 0140 && c
<= 0142) {
487 ins
->oprs
[c
- 0140].offset
= *data
++;
488 ins
->oprs
[c
- 0140].offset
|= (((unsigned)*data
++) << 8);
489 ins
->oprs
[c
- 0140].offset
|= (((int32_t)*data
++) << 16);
490 ins
->oprs
[c
- 0140].offset
|= (((int32_t)*data
++) << 24);
492 if (c
>= 0200 && c
<= 0277) {
494 if (((modrm
>> 3) & 07) != (c
& 07))
495 return FALSE
; /* spare field doesn't match up */
496 data
= do_ea(data
, modrm
, asize
, segsize
,
497 &ins
->oprs
[(c
>> 3) & 07]);
499 if (c
>= 0300 && c
<= 0302) {
501 ins
->oprs
[c
- 0300].segment
|= SEG_32BIT
;
503 ins
->oprs
[c
- 0300].segment
&= ~SEG_32BIT
;
519 if (asize
!= segsize
)
537 if (osize
!= segsize
)
543 int t
= *r
++, d
= *data
++;
544 if (d
< t
|| d
> t
+ 15)
547 ins
->condition
= d
- t
;
565 * Check for unused rep or a/o prefixes.
569 ins
->prefixes
[ins
->nprefix
++] = drep
;
570 if (!a_used
&& asize
!= segsize
)
571 ins
->prefixes
[ins
->nprefix
++] = (asize
== 16 ? P_A16
: P_A32
);
572 if (!o_used
&& osize
!= segsize
)
573 ins
->prefixes
[ins
->nprefix
++] = (osize
== 16 ? P_O16
: P_O32
);
575 return data
- origdata
;
578 int32_t disasm(uint8_t *data
, char *output
, int outbufsize
, int segsize
,
579 int32_t offset
, int autosync
, uint32_t prefer
)
581 struct itemplate
**p
, **best_p
;
582 int length
, best_length
= 0;
584 int rep
, lock
, asize
, osize
, i
, slen
, colon
;
588 uint32_t goodness
, best
;
593 asize
= osize
= segsize
;
598 if (*data
== 0xF3 || *data
== 0xF2)
600 else if (*data
== 0xF0)
602 else if (*data
== 0x2E || *data
== 0x36 || *data
== 0x3E ||
603 *data
== 0x26 || *data
== 0x64 || *data
== 0x65) {
624 } else if (*data
== 0x66)
625 osize
= 48 - segsize
, data
++;
626 else if (*data
== 0x67)
627 asize
= 48 - segsize
, data
++;
632 tmp_ins
.oprs
[0].segment
= tmp_ins
.oprs
[1].segment
=
633 tmp_ins
.oprs
[2].segment
=
634 tmp_ins
.oprs
[0].addr_size
= tmp_ins
.oprs
[1].addr_size
=
635 tmp_ins
.oprs
[2].addr_size
= (segsize
== 16 ? 0 : SEG_32BIT
);
636 tmp_ins
.condition
= -1;
637 best
= ~0UL; /* Worst possible */
639 for (p
= itable
[*data
]; *p
; p
++) {
640 if ((length
= matches(*p
, data
, asize
, osize
,
641 segsize
, rep
, &tmp_ins
))) {
644 * Final check to make sure the types of r/m match up.
646 for (i
= 0; i
< (*p
)->operands
; i
++) {
648 /* If it's a mem-only EA but we have a register, die. */
649 ((tmp_ins
.oprs
[i
].segment
& SEG_RMREG
) &&
650 !(MEMORY
& ~(*p
)->opd
[i
])) ||
651 /* If it's a reg-only EA but we have a memory ref, die. */
652 (!(tmp_ins
.oprs
[i
].segment
& SEG_RMREG
) &&
653 !(REGNORM
& ~(*p
)->opd
[i
]) &&
654 !((*p
)->opd
[i
] & REG_SMASK
)) ||
655 /* Register type mismatch (eg FS vs REG_DESS): die. */
656 ((((*p
)->opd
[i
] & (REGISTER
| FPUREG
)) ||
657 (tmp_ins
.oprs
[i
].segment
& SEG_RMREG
)) &&
658 !whichreg((*p
)->opd
[i
],
659 tmp_ins
.oprs
[i
].basereg
))) {
666 goodness
= ((*p
)->flags
& IF_PFMASK
) ^ prefer
;
667 if (goodness
< best
) {
668 /* This is the best one found so far */
671 best_length
= length
;
679 return 0; /* no instruction was matched */
681 /* Pick the best match */
683 length
= best_length
;
687 /* TODO: snprintf returns the value that the string would have if
688 * the buffer were long enough, and not the actual length of
689 * the returned string, so each instance of using the return
690 * value of snprintf should actually be checked to assure that
691 * the return value is "sane." Maybe a macro wrapper could
692 * be used for that purpose.
695 slen
+= snprintf(output
+ slen
, outbufsize
- slen
, "lock ");
696 for (i
= 0; i
< ins
.nprefix
; i
++)
697 switch (ins
.prefixes
[i
]) {
699 slen
+= snprintf(output
+ slen
, outbufsize
- slen
, "rep ");
702 slen
+= snprintf(output
+ slen
, outbufsize
- slen
, "repe ");
705 slen
+= snprintf(output
+ slen
, outbufsize
- slen
, "repne ");
708 slen
+= snprintf(output
+ slen
, outbufsize
- slen
, "a16 ");
711 slen
+= snprintf(output
+ slen
, outbufsize
- slen
, "a32 ");
714 slen
+= snprintf(output
+ slen
, outbufsize
- slen
, "o16 ");
717 slen
+= snprintf(output
+ slen
, outbufsize
- slen
, "o32 ");
721 for (i
= 0; i
< elements(ico
); i
++)
722 if ((*p
)->opcode
== ico
[i
]) {
724 snprintf(output
+ slen
, outbufsize
- slen
, "%s%s", icn
[i
],
725 whichcond(ins
.condition
));
728 if (i
>= elements(ico
))
730 snprintf(output
+ slen
, outbufsize
- slen
, "%s",
731 insn_names
[(*p
)->opcode
]);
733 length
+= data
- origdata
; /* fix up for prefixes */
734 for (i
= 0; i
< (*p
)->operands
; i
++) {
735 output
[slen
++] = (colon
? ':' : i
== 0 ? ' ' : ',');
737 if (ins
.oprs
[i
].segment
& SEG_RELATIVE
) {
738 ins
.oprs
[i
].offset
+= offset
+ length
;
740 * sort out wraparound
742 if (!(ins
.oprs
[i
].segment
& SEG_32BIT
))
743 ins
.oprs
[i
].offset
&= 0xFFFF;
745 * add sync marker, if autosync is on
748 add_sync(ins
.oprs
[i
].offset
, 0L);
751 if ((*p
)->opd
[i
] & COLON
)
756 if (((*p
)->opd
[i
] & (REGISTER
| FPUREG
)) ||
757 (ins
.oprs
[i
].segment
& SEG_RMREG
)) {
758 ins
.oprs
[i
].basereg
= whichreg((*p
)->opd
[i
],
759 ins
.oprs
[i
].basereg
);
760 if ((*p
)->opd
[i
] & TO
)
761 slen
+= snprintf(output
+ slen
, outbufsize
- slen
, "to ");
762 slen
+= snprintf(output
+ slen
, outbufsize
- slen
, "%s",
763 reg_names
[ins
.oprs
[i
].basereg
-
765 } else if (!(UNITY
& ~(*p
)->opd
[i
])) {
766 output
[slen
++] = '1';
767 } else if ((*p
)->opd
[i
] & IMMEDIATE
) {
768 if ((*p
)->opd
[i
] & BITS8
) {
770 snprintf(output
+ slen
, outbufsize
- slen
, "byte ");
771 if (ins
.oprs
[i
].segment
& SEG_SIGNED
) {
772 if (ins
.oprs
[i
].offset
< 0) {
773 ins
.oprs
[i
].offset
*= -1;
774 output
[slen
++] = '-';
776 output
[slen
++] = '+';
778 } else if ((*p
)->opd
[i
] & BITS16
) {
780 snprintf(output
+ slen
, outbufsize
- slen
, "word ");
781 } else if ((*p
)->opd
[i
] & BITS32
) {
783 snprintf(output
+ slen
, outbufsize
- slen
, "dword ");
784 } else if ((*p
)->opd
[i
] & NEAR
) {
786 snprintf(output
+ slen
, outbufsize
- slen
, "near ");
787 } else if ((*p
)->opd
[i
] & SHORT
) {
789 snprintf(output
+ slen
, outbufsize
- slen
, "short ");
792 snprintf(output
+ slen
, outbufsize
- slen
, "0x%"PRIx64
"",
794 } else if (!(MEM_OFFS
& ~(*p
)->opd
[i
])) {
796 snprintf(output
+ slen
, outbufsize
- slen
, "[%s%s%s0x%"PRIx64
"]",
797 ((const char*)segover
? (const char*)segover
: ""), /* placate type mistmatch warning */
798 ((const char*)segover
? ":" : ""), /* by using (const char*) instead of uint8_t* */
799 (ins
.oprs
[i
].addr_size
==
800 32 ? "dword " : ins
.oprs
[i
].addr_size
==
801 16 ? "word " : ""), ins
.oprs
[i
].offset
);
803 } else if (!(REGMEM
& ~(*p
)->opd
[i
])) {
805 if ((*p
)->opd
[i
] & BITS8
)
807 snprintf(output
+ slen
, outbufsize
- slen
, "byte ");
808 if ((*p
)->opd
[i
] & BITS16
)
810 snprintf(output
+ slen
, outbufsize
- slen
, "word ");
811 if ((*p
)->opd
[i
] & BITS32
)
813 snprintf(output
+ slen
, outbufsize
- slen
, "dword ");
814 if ((*p
)->opd
[i
] & BITS64
)
816 snprintf(output
+ slen
, outbufsize
- slen
, "qword ");
817 if ((*p
)->opd
[i
] & BITS80
)
819 snprintf(output
+ slen
, outbufsize
- slen
, "tword ");
820 if ((*p
)->opd
[i
] & FAR
)
821 slen
+= snprintf(output
+ slen
, outbufsize
- slen
, "far ");
822 if ((*p
)->opd
[i
] & NEAR
)
824 snprintf(output
+ slen
, outbufsize
- slen
, "near ");
825 output
[slen
++] = '[';
826 if (ins
.oprs
[i
].addr_size
)
827 slen
+= snprintf(output
+ slen
, outbufsize
- slen
, "%s",
828 (ins
.oprs
[i
].addr_size
== 32 ? "dword " :
829 ins
.oprs
[i
].addr_size
==
833 snprintf(output
+ slen
, outbufsize
- slen
, "%s:",
837 if (ins
.oprs
[i
].basereg
!= -1) {
838 slen
+= snprintf(output
+ slen
, outbufsize
- slen
, "%s",
839 reg_names
[(ins
.oprs
[i
].basereg
-
843 if (ins
.oprs
[i
].indexreg
!= -1) {
845 output
[slen
++] = '+';
846 slen
+= snprintf(output
+ slen
, outbufsize
- slen
, "%s",
847 reg_names
[(ins
.oprs
[i
].indexreg
-
849 if (ins
.oprs
[i
].scale
> 1)
851 snprintf(output
+ slen
, outbufsize
- slen
, "*%d",
855 if (ins
.oprs
[i
].segment
& SEG_DISP8
) {
857 if (ins
.oprs
[i
].offset
& 0x80) {
858 ins
.oprs
[i
].offset
= -(char)ins
.oprs
[i
].offset
;
862 snprintf(output
+ slen
, outbufsize
- slen
, "%c0x%"PRIx64
"",
863 sign
, ins
.oprs
[i
].offset
);
864 } else if (ins
.oprs
[i
].segment
& SEG_DISP16
) {
866 output
[slen
++] = '+';
868 snprintf(output
+ slen
, outbufsize
- slen
, "0x%"PRIx64
"",
870 } else if (ins
.oprs
[i
].segment
& SEG_DISP32
) {
872 output
[slen
++] = '+';
874 snprintf(output
+ slen
, outbufsize
- slen
, "0x%"PRIx64
"",
877 output
[slen
++] = ']';
880 snprintf(output
+ slen
, outbufsize
- slen
, "<operand%d>",
885 if (segover
) { /* unused segment override */
887 int count
= slen
+ 1;
889 p
[count
+ 3] = p
[count
];
890 strncpy(output
, segover
, 2);
896 int32_t eatbyte(uint8_t *data
, char *output
, int outbufsize
)
898 snprintf(output
, outbufsize
, "db 0x%02X", *data
);