1 /* ----------------------------------------------------------------------- *
3 * Copyright 1996-2012 The NASM Authors - All Rights Reserved
4 * See the file AUTHORS included with the NASM distribution for
5 * the specific copyright holders.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
19 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
20 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
21 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
30 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 * ----------------------------------------------------------------------- */
35 * disasm.c where all the _work_ gets done in the Netwide Disassembler
52 #define fetch_safe(_start, _ptr, _size, _need, _op) \
54 if (((_ptr) - (_start)) >= ((_size) - (_need))) \
58 #define fetch_or_return(_start, _ptr, _size, _need) \
59 fetch_safe(_start, _ptr, _size, _need, return 0)
62 * Flags that go into the `segment' field of `insn' structures
65 #define SEG_RELATIVE 1
72 #define SEG_SIGNED 128
79 uint8_t osize
; /* Operand size */
80 uint8_t asize
; /* Address size */
81 uint8_t osp
; /* Operand size prefix present */
82 uint8_t asp
; /* Address size prefix present */
83 uint8_t rep
; /* Rep prefix present */
84 uint8_t seg
; /* Segment override prefix present */
85 uint8_t wait
; /* WAIT "prefix" present */
86 uint8_t lock
; /* Lock prefix present */
87 uint8_t vex
[3]; /* VEX prefix present */
88 uint8_t vex_c
; /* VEX "class" (VEX, XOP, ...) */
89 uint8_t vex_m
; /* VEX.M field */
91 uint8_t vex_lp
; /* VEX.LP fields */
92 uint32_t rex
; /* REX prefix present */
93 uint8_t evex
[3]; /* EVEX prefix present */
96 #define getu8(x) (*(uint8_t *)(x))
98 /* Littleendian CPU which can handle unaligned references */
99 #define getu16(x) (*(uint16_t *)(x))
100 #define getu32(x) (*(uint32_t *)(x))
101 #define getu64(x) (*(uint64_t *)(x))
103 static uint16_t getu16(uint8_t *data
)
105 return (uint16_t)data
[0] + ((uint16_t)data
[1] << 8);
107 static uint32_t getu32(uint8_t *data
)
109 return (uint32_t)getu16(data
) + ((uint32_t)getu16(data
+2) << 16);
111 static uint64_t getu64(uint8_t *data
)
113 return (uint64_t)getu32(data
) + ((uint64_t)getu32(data
+4) << 32);
117 #define gets8(x) ((int8_t)getu8(x))
118 #define gets16(x) ((int16_t)getu16(x))
119 #define gets32(x) ((int32_t)getu32(x))
120 #define gets64(x) ((int64_t)getu64(x))
122 /* Important: regval must already have been adjusted for rex extensions */
123 static enum reg_enum
whichreg(opflags_t regflags
, int regval
, int rex
)
127 static const struct {
130 } specific_registers
[] = {
156 if (!(regflags
& (REGISTER
|REGMEM
)))
157 return 0; /* Registers not permissible?! */
159 regflags
|= REGISTER
;
161 for (i
= 0; i
< ARRAY_SIZE(specific_registers
); i
++)
162 if (!(specific_registers
[i
].flags
& ~regflags
))
163 return specific_registers
[i
].reg
;
165 /* All the entries below look up regval in an 16-entry array */
166 if (regval
< 0 || regval
> (rex
& REX_EV
? 31 : 15))
169 #define GET_REGISTER(__array, __index) \
170 ((size_t)(__index) < (size_t)ARRAY_SIZE(__array) ? __array[(__index)] : 0)
172 if (!(REG8
& ~regflags
)) {
173 if (rex
& (REX_P
|REX_NH
))
174 return GET_REGISTER(nasm_rd_reg8_rex
, regval
);
176 return GET_REGISTER(nasm_rd_reg8
, regval
);
178 if (!(REG16
& ~regflags
))
179 return GET_REGISTER(nasm_rd_reg16
, regval
);
180 if (!(REG32
& ~regflags
))
181 return GET_REGISTER(nasm_rd_reg32
, regval
);
182 if (!(REG64
& ~regflags
))
183 return GET_REGISTER(nasm_rd_reg64
, regval
);
184 if (!(REG_SREG
& ~regflags
))
185 return GET_REGISTER(nasm_rd_sreg
, regval
& 7); /* Ignore REX */
186 if (!(REG_CREG
& ~regflags
))
187 return GET_REGISTER(nasm_rd_creg
, regval
);
188 if (!(REG_DREG
& ~regflags
))
189 return GET_REGISTER(nasm_rd_dreg
, regval
);
190 if (!(REG_TREG
& ~regflags
)) {
192 return 0; /* TR registers are ill-defined with rex */
193 return GET_REGISTER(nasm_rd_treg
, regval
);
195 if (!(FPUREG
& ~regflags
))
196 return GET_REGISTER(nasm_rd_fpureg
, regval
& 7); /* Ignore REX */
197 if (!(MMXREG
& ~regflags
))
198 return GET_REGISTER(nasm_rd_mmxreg
, regval
& 7); /* Ignore REX */
199 if (!(XMMREG
& ~regflags
))
200 return GET_REGISTER(nasm_rd_xmmreg
, regval
);
201 if (!(YMMREG
& ~regflags
))
202 return GET_REGISTER(nasm_rd_ymmreg
, regval
);
203 if (!(ZMMREG
& ~regflags
))
204 return GET_REGISTER(nasm_rd_zmmreg
, regval
);
205 if (!(OPMASKREG
& ~regflags
))
206 return GET_REGISTER(nasm_rd_opmaskreg
, regval
);
207 if (!(BNDREG
& ~regflags
))
208 return GET_REGISTER(nasm_rd_bndreg
, regval
);
214 static uint32_t append_evex_reg_deco(char *buf
, uint32_t num
,
215 decoflags_t deco
, uint8_t *evex
)
217 const char * const er_names
[] = {"rn-sae", "rd-sae", "ru-sae", "rz-sae"};
218 uint32_t num_chars
= 0;
220 if ((deco
& MASK
) && (evex
[2] & EVEX_P2AAA
)) {
221 enum reg_enum opmasknum
= nasm_rd_opmaskreg
[evex
[2] & EVEX_P2AAA
];
222 const char * regname
= nasm_reg_names
[opmasknum
- EXPR_REG_START
];
224 num_chars
+= snprintf(buf
+ num_chars
, num
- num_chars
,
227 if ((deco
& Z
) && (evex
[2] & EVEX_P2Z
)) {
228 num_chars
+= snprintf(buf
+ num_chars
, num
- num_chars
,
233 if (evex
[2] & EVEX_P2B
) {
235 uint8_t er_type
= (evex
[2] & EVEX_P2LL
) >> 5;
236 num_chars
+= snprintf(buf
+ num_chars
, num
- num_chars
,
237 ",{%s}", er_names
[er_type
]);
238 } else if (deco
& SAE
) {
239 num_chars
+= snprintf(buf
+ num_chars
, num
- num_chars
,
247 static uint32_t append_evex_mem_deco(char *buf
, uint32_t num
, opflags_t type
,
248 decoflags_t deco
, uint8_t *evex
)
250 uint32_t num_chars
= 0;
252 if ((evex
[2] & EVEX_P2B
) && (deco
& BRDCAST_MASK
)) {
253 decoflags_t deco_brsize
= deco
& BRSIZE_MASK
;
254 opflags_t template_opsize
= (deco_brsize
== BR_BITS32
? BITS32
: BITS64
);
255 uint8_t br_num
= (type
& SIZE_MASK
) / BITS128
*
256 BITS64
/ template_opsize
* 2;
258 num_chars
+= snprintf(buf
+ num_chars
, num
- num_chars
,
262 if ((deco
& MASK
) && (evex
[2] & EVEX_P2AAA
)) {
263 enum reg_enum opmasknum
= nasm_rd_opmaskreg
[evex
[2] & EVEX_P2AAA
];
264 const char * regname
= nasm_reg_names
[opmasknum
- EXPR_REG_START
];
266 num_chars
+= snprintf(buf
+ num_chars
, num
- num_chars
,
269 if ((deco
& Z
) && (evex
[2] & EVEX_P2Z
)) {
270 num_chars
+= snprintf(buf
+ num_chars
, num
- num_chars
,
280 * Process an effective address (ModRM) specification.
282 static uint8_t *do_ea(uint8_t *data
, int modrm
, int asize
,
283 int segsize
, enum ea_type type
,
284 operand
*op
, insn
*ins
)
286 int mod
, rm
, scale
, index
, base
;
290 bool is_evex
= !!(ins
->rex
& REX_EV
);
292 mod
= (modrm
>> 6) & 03;
295 if (mod
!= 3 && asize
!= 16 && rm
== 4)
301 if (mod
== 3) { /* pure register version */
302 op
->basereg
= rm
+(rex
& REX_B
? 8 : 0);
303 op
->segment
|= SEG_RMREG
;
304 if (is_evex
&& segsize
== 64) {
305 op
->basereg
+= (evex
[0] & EVEX_P0X
? 0 : 16);
315 * <mod> specifies the displacement size (none, byte or
316 * word), and <rm> specifies the register combination.
317 * Exception: mod=0,rm=6 does not specify [BP] as one might
318 * expect, but instead specifies [disp16].
321 if (type
!= EA_SCALAR
)
324 op
->indexreg
= op
->basereg
= -1;
325 op
->scale
= 1; /* always, in 16 bits */
356 if (rm
== 6 && mod
== 0) { /* special case */
360 mod
= 2; /* fake disp16 */
364 op
->segment
|= SEG_NODISP
;
367 op
->segment
|= SEG_DISP8
;
368 if (ins
->evex_tuple
!= 0) {
369 op
->offset
= gets8(data
) * get_disp8N(ins
);
371 op
->offset
= gets8(data
);
376 op
->segment
|= SEG_DISP16
;
377 op
->offset
= *data
++;
378 op
->offset
|= ((unsigned)*data
++) << 8;
384 * Once again, <mod> specifies displacement size (this time
385 * none, byte or *dword*), while <rm> specifies the base
386 * register. Again, [EBP] is missing, replaced by a pure
387 * disp32 (this time that's mod=0,rm=*5*) in 32-bit mode,
388 * and RIP-relative addressing in 64-bit mode.
391 * indicates not a single base register, but instead the
392 * presence of a SIB byte...
394 int a64
= asize
== 64;
399 op
->basereg
= nasm_rd_reg64
[rm
| ((rex
& REX_B
) ? 8 : 0)];
401 op
->basereg
= nasm_rd_reg32
[rm
| ((rex
& REX_B
) ? 8 : 0)];
403 if (rm
== 5 && mod
== 0) {
405 op
->eaflags
|= EAF_REL
;
406 op
->segment
|= SEG_RELATIVE
;
410 op
->disp_size
= asize
;
413 mod
= 2; /* fake disp32 */
417 if (rm
== 4) { /* process SIB */
419 scale
= (sib
>> 6) & 03;
420 index
= (sib
>> 3) & 07;
423 op
->scale
= 1 << scale
;
426 vsib_hi
= (rex
& REX_X
? 8 : 0) |
427 (evex
[2] & EVEX_P2VP
? 0 : 16);
430 if (type
== EA_XMMVSIB
)
431 op
->indexreg
= nasm_rd_xmmreg
[index
| vsib_hi
];
432 else if (type
== EA_YMMVSIB
)
433 op
->indexreg
= nasm_rd_ymmreg
[index
| vsib_hi
];
434 else if (type
== EA_ZMMVSIB
)
435 op
->indexreg
= nasm_rd_zmmreg
[index
| vsib_hi
];
436 else if (index
== 4 && !(rex
& REX_X
))
437 op
->indexreg
= -1; /* ESP/RSP cannot be an index */
439 op
->indexreg
= nasm_rd_reg64
[index
| ((rex
& REX_X
) ? 8 : 0)];
441 op
->indexreg
= nasm_rd_reg32
[index
| ((rex
& REX_X
) ? 8 : 0)];
443 if (base
== 5 && mod
== 0) {
445 mod
= 2; /* Fake disp32 */
447 op
->basereg
= nasm_rd_reg64
[base
| ((rex
& REX_B
) ? 8 : 0)];
449 op
->basereg
= nasm_rd_reg32
[base
| ((rex
& REX_B
) ? 8 : 0)];
453 } else if (type
!= EA_SCALAR
) {
454 /* Can't have VSIB without SIB */
460 op
->segment
|= SEG_NODISP
;
463 op
->segment
|= SEG_DISP8
;
464 if (ins
->evex_tuple
!= 0) {
465 op
->offset
= gets8(data
) * get_disp8N(ins
);
467 op
->offset
= gets8(data
);
472 op
->segment
|= SEG_DISP32
;
473 op
->offset
= gets32(data
);
482 * Determine whether the instruction template in t corresponds to the data
483 * stream in data. Return the number of bytes matched if so.
485 #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
487 static int matches(const struct itemplate
*t
, uint8_t *data
,
488 const struct prefix_info
*prefix
, int segsize
, insn
*ins
)
490 uint8_t *r
= (uint8_t *)(t
->code
);
491 uint8_t *origdata
= data
;
492 bool a_used
= false, o_used
= false;
493 enum prefixes drep
= 0;
494 enum prefixes dwait
= 0;
495 uint8_t lock
= prefix
->lock
;
496 int osize
= prefix
->osize
;
497 int asize
= prefix
->asize
;
500 struct operand
*opx
, *opy
;
503 int regmask
= (segsize
== 64) ? 15 : 7;
504 enum ea_type eat
= EA_SCALAR
;
506 for (i
= 0; i
< MAX_OPERANDS
; i
++) {
507 ins
->oprs
[i
].segment
= ins
->oprs
[i
].disp_size
=
508 (segsize
== 64 ? SEG_64BIT
: segsize
== 32 ? SEG_32BIT
: 0);
512 ins
->rex
= prefix
->rex
;
513 memset(ins
->prefixes
, 0, sizeof ins
->prefixes
);
515 if (itemp_has(t
, (segsize
== 64 ? IF_NOLONG
: IF_LONG
)))
518 if (prefix
->rep
== 0xF2)
519 drep
= (itemp_has(t
, IF_BND
) ? P_BND
: P_REPNE
);
520 else if (prefix
->rep
== 0xF3)
523 dwait
= prefix
->wait
? P_WAIT
: 0;
525 while ((c
= *r
++) != 0) {
526 op1
= (c
& 3) + ((opex
& 1) << 2);
527 op2
= ((c
>> 3) & 3) + ((opex
& 2) << 1);
528 opx
= &ins
->oprs
[op1
];
529 opy
= &ins
->oprs
[op2
];
550 int t
= *r
++, d
= *data
++;
551 if (d
< t
|| d
> t
+ 7)
554 opx
->basereg
= (d
-t
)+
555 (ins
->rex
& REX_B
? 8 : 0);
556 opx
->segment
|= SEG_RMREG
;
562 /* this is an separate index reg position of MIB operand (ICC) */
563 /* Disassembler uses NASM's split EA form only */
567 opx
->offset
= (int8_t)*data
++;
568 opx
->segment
|= SEG_SIGNED
;
572 opx
->offset
= *data
++;
576 opx
->offset
= *data
++;
580 opx
->offset
= getu16(data
);
586 opx
->offset
= getu32(data
);
589 opx
->offset
= getu16(data
);
592 if (segsize
!= asize
)
593 opx
->disp_size
= asize
;
597 opx
->offset
= getu32(data
);
602 opx
->offset
= gets32(data
);
609 opx
->offset
= getu16(data
);
615 opx
->offset
= getu32(data
);
621 opx
->offset
= getu64(data
);
629 opx
->offset
= gets8(data
++);
630 opx
->segment
|= SEG_RELATIVE
;
634 opx
->offset
= getu64(data
);
639 opx
->offset
= gets16(data
);
641 opx
->segment
|= SEG_RELATIVE
;
642 opx
->segment
&= ~SEG_32BIT
;
645 case4(064): /* rel */
646 opx
->segment
|= SEG_RELATIVE
;
647 /* In long mode rel is always 32 bits, sign extended. */
648 if (segsize
== 64 || osize
== 32) {
649 opx
->offset
= gets32(data
);
652 opx
->segment
|= SEG_32BIT
;
653 opx
->type
= (opx
->type
& ~SIZE_MASK
)
654 | (segsize
== 64 ? BITS64
: BITS32
);
656 opx
->offset
= gets16(data
);
658 opx
->segment
&= ~SEG_32BIT
;
659 opx
->type
= (opx
->type
& ~SIZE_MASK
) | BITS16
;
664 opx
->offset
= gets32(data
);
666 opx
->segment
|= SEG_32BIT
| SEG_RELATIVE
;
675 opx
->segment
|= SEG_RMREG
;
676 data
= do_ea(data
, modrm
, asize
, segsize
, eat
, opy
, ins
);
679 opx
->basereg
= ((modrm
>> 3) & 7) + (ins
->rex
& REX_R
? 8 : 0);
680 if ((ins
->rex
& REX_EV
) && (segsize
== 64))
681 opx
->basereg
+= (ins
->evex_p
[0] & EVEX_P0RP
? 0 : 16);
687 uint8_t ximm
= *data
++;
689 ins
->oprs
[c
>> 3].basereg
= (ximm
>> 4) & regmask
;
690 ins
->oprs
[c
>> 3].segment
|= SEG_RMREG
;
691 ins
->oprs
[c
& 7].offset
= ximm
& 15;
697 uint8_t ximm
= *data
++;
703 ins
->oprs
[c
>> 4].basereg
= (ximm
>> 4) & regmask
;
704 ins
->oprs
[c
>> 4].segment
|= SEG_RMREG
;
710 uint8_t ximm
= *data
++;
712 opx
->basereg
= (ximm
>> 4) & regmask
;
713 opx
->segment
|= SEG_RMREG
;
727 if (((modrm
>> 3) & 07) != (c
& 07))
728 return 0; /* spare field doesn't match up */
729 data
= do_ea(data
, modrm
, asize
, segsize
, eat
, opy
, ins
);
738 uint8_t evexm
= *r
++;
739 uint8_t evexwlp
= *r
++;
740 uint8_t modrm
, valid_mask
;
741 ins
->evex_tuple
= *r
++ - 0300;
742 modrm
= *(origdata
+ 1);
745 if ((prefix
->rex
& (REX_EV
|REX_V
|REX_P
)) != REX_EV
)
748 if ((evexm
& 0x1f) != prefix
->vex_m
)
751 switch (evexwlp
& 060) {
753 if (prefix
->rex
& REX_W
)
757 if (!(prefix
->rex
& REX_W
))
761 case 040: /* VEX.W is a don't care */
768 /* If EVEX.b is set with reg-reg op,
769 * EVEX.L'L contains embedded rounding control info
771 if ((prefix
->evex
[2] & EVEX_P2B
) && ((modrm
>> 6) == 3)) {
772 valid_mask
= 0x3; /* prefix only */
774 valid_mask
= 0xf; /* vector length and prefix */
776 if ((evexwlp
^ prefix
->vex_lp
) & valid_mask
)
780 if ((prefix
->vex_v
!= 0) ||
781 (!(prefix
->evex
[2] & EVEX_P2VP
) &&
782 ((eat
< EA_XMMVSIB
) || (eat
> EA_ZMMVSIB
))))
785 opx
->segment
|= SEG_RMREG
;
786 opx
->basereg
= ((~prefix
->evex
[2] & EVEX_P2VP
) << (4 - 3) ) |
790 memcpy(ins
->evex_p
, prefix
->evex
, 3);
801 if ((prefix
->rex
& (REX_V
|REX_P
)) != REX_V
)
804 if ((vexm
& 0x1f) != prefix
->vex_m
)
807 switch (vexwlp
& 060) {
809 if (prefix
->rex
& REX_W
)
813 if (!(prefix
->rex
& REX_W
))
817 case 040: /* VEX.W is a don't care */
824 /* The 010 bit of vexwlp is set if VEX.L is ignored */
825 if ((vexwlp
^ prefix
->vex_lp
) & ((vexwlp
& 010) ? 03 : 07))
829 if (prefix
->vex_v
!= 0)
832 opx
->segment
|= SEG_RMREG
;
833 opx
->basereg
= prefix
->vex_v
;
840 if (prefix
->rep
== 0xF3)
845 if (prefix
->rep
== 0xF2)
847 else if (prefix
->rep
== 0xF3)
852 if (prefix
->lock
== 0xF0) {
853 if (prefix
->rep
== 0xF2)
855 else if (prefix
->rep
== 0xF3)
875 if (asize
!= segsize
)
889 if (prefix
->rex
& REX_B
)
894 if (prefix
->rex
& REX_X
)
899 if (prefix
->rex
& REX_R
)
904 if (prefix
->rex
& REX_W
)
923 if (osize
!= (segsize
== 16 ? 16 : 32))
930 ins
->rex
|= REX_W
; /* 64-bit only instruction */
947 int t
= *r
++, d
= *data
++;
948 if (d
< t
|| d
> t
+ 15)
951 ins
->condition
= d
- t
;
956 if (prefix
->rep
== 0xF3)
966 if (prefix
->rep
!= 0xF2)
972 if (prefix
->rep
!= 0xF3)
997 if (prefix
->wait
!= 0x9B)
1003 if (prefix
->osp
|| prefix
->rep
)
1008 if (!prefix
->osp
|| prefix
->rep
)
1052 return 0; /* Unknown code */
1056 if (!vex_ok
&& (ins
->rex
& (REX_V
| REX_EV
)))
1059 /* REX cannot be combined with VEX */
1060 if ((ins
->rex
& REX_V
) && (prefix
->rex
& REX_P
))
1064 * Check for unused rep or a/o prefixes.
1066 for (i
= 0; i
< t
->operands
; i
++) {
1067 if (ins
->oprs
[i
].segment
!= SEG_RMREG
)
1072 if (ins
->prefixes
[PPS_LOCK
])
1074 ins
->prefixes
[PPS_LOCK
] = P_LOCK
;
1077 if (ins
->prefixes
[PPS_REP
])
1079 ins
->prefixes
[PPS_REP
] = drep
;
1081 ins
->prefixes
[PPS_WAIT
] = dwait
;
1083 if (osize
!= ((segsize
== 16) ? 16 : 32)) {
1084 enum prefixes pfx
= 0;
1098 if (ins
->prefixes
[PPS_OSIZE
])
1100 ins
->prefixes
[PPS_OSIZE
] = pfx
;
1103 if (!a_used
&& asize
!= segsize
) {
1104 if (ins
->prefixes
[PPS_ASIZE
])
1106 ins
->prefixes
[PPS_ASIZE
] = asize
== 16 ? P_A16
: P_A32
;
1109 /* Fix: check for redundant REX prefixes */
1111 return data
- origdata
;
1114 /* Condition names for disassembly, sorted by x86 code */
1115 static const char * const condition_name
[16] = {
1116 "o", "no", "c", "nc", "z", "nz", "na", "a",
1117 "s", "ns", "pe", "po", "l", "nl", "ng", "g"
1120 int32_t disasm(uint8_t *data
, int32_t data_size
, char *output
, int outbufsize
, int segsize
,
1121 int64_t offset
, int autosync
, iflag_t
*prefer
)
1123 const struct itemplate
* const *p
, * const *best_p
;
1124 const struct disasm_index
*ix
;
1126 int length
, best_length
= 0;
1128 int i
, slen
, colon
, n
;
1132 iflag_t goodness
, best
;
1134 struct prefix_info prefix
;
1138 memset(&ins
, 0, sizeof ins
);
1141 * Scan for prefixes.
1143 memset(&prefix
, 0, sizeof prefix
);
1144 prefix
.asize
= segsize
;
1145 prefix
.osize
= (segsize
== 64) ? 32 : segsize
;
1152 while (!end_prefix
) {
1156 fetch_or_return(origdata
, data
, data_size
, 1);
1157 prefix
.rep
= *data
++;
1161 fetch_or_return(origdata
, data
, data_size
, 1);
1162 prefix
.wait
= *data
++;
1166 fetch_or_return(origdata
, data
, data_size
, 1);
1167 prefix
.lock
= *data
++;
1171 fetch_or_return(origdata
, data
, data_size
, 1);
1172 segover
= "cs", prefix
.seg
= *data
++;
1175 fetch_or_return(origdata
, data
, data_size
, 1);
1176 segover
= "ss", prefix
.seg
= *data
++;
1179 fetch_or_return(origdata
, data
, data_size
, 1);
1180 segover
= "ds", prefix
.seg
= *data
++;
1183 fetch_or_return(origdata
, data
, data_size
, 1);
1184 segover
= "es", prefix
.seg
= *data
++;
1187 fetch_or_return(origdata
, data
, data_size
, 1);
1188 segover
= "fs", prefix
.seg
= *data
++;
1191 fetch_or_return(origdata
, data
, data_size
, 1);
1192 segover
= "gs", prefix
.seg
= *data
++;
1196 fetch_or_return(origdata
, data
, data_size
, 1);
1197 prefix
.osize
= (segsize
== 16) ? 32 : 16;
1198 prefix
.osp
= *data
++;
1201 fetch_or_return(origdata
, data
, data_size
, 1);
1202 prefix
.asize
= (segsize
== 32) ? 16 : 32;
1203 prefix
.asp
= *data
++;
1208 if (segsize
== 64 || (data
[1] & 0xc0) == 0xc0) {
1209 fetch_or_return(origdata
, data
, data_size
, 2);
1210 prefix
.vex
[0] = *data
++;
1211 prefix
.vex
[1] = *data
++;
1214 prefix
.vex_c
= RV_VEX
;
1216 if (prefix
.vex
[0] == 0xc4) {
1217 fetch_or_return(origdata
, data
, data_size
, 1);
1218 prefix
.vex
[2] = *data
++;
1219 prefix
.rex
|= (~prefix
.vex
[1] >> 5) & 7; /* REX_RXB */
1220 prefix
.rex
|= (prefix
.vex
[2] >> (7-3)) & REX_W
;
1221 prefix
.vex_m
= prefix
.vex
[1] & 0x1f;
1222 prefix
.vex_v
= (~prefix
.vex
[2] >> 3) & 15;
1223 prefix
.vex_lp
= prefix
.vex
[2] & 7;
1225 prefix
.rex
|= (~prefix
.vex
[1] >> (7-2)) & REX_R
;
1227 prefix
.vex_v
= (~prefix
.vex
[1] >> 3) & 15;
1228 prefix
.vex_lp
= prefix
.vex
[1] & 7;
1231 ix
= itable_vex
[RV_VEX
][prefix
.vex_m
][prefix
.vex_lp
& 3];
1238 if (segsize
== 64 || ((data
[1] & 0xc0) == 0xc0)) {
1239 fetch_or_return(origdata
, data
, data_size
, 4);
1240 data
++; /* 62h EVEX prefix */
1241 prefix
.evex
[0] = *data
++;
1242 prefix
.evex
[1] = *data
++;
1243 prefix
.evex
[2] = *data
++;
1245 prefix
.rex
= REX_EV
;
1246 prefix
.vex_c
= RV_EVEX
;
1247 prefix
.rex
|= (~prefix
.evex
[0] >> 5) & 7; /* REX_RXB */
1248 prefix
.rex
|= (prefix
.evex
[1] >> (7-3)) & REX_W
;
1249 prefix
.vex_m
= prefix
.evex
[0] & EVEX_P0MM
;
1250 prefix
.vex_v
= (~prefix
.evex
[1] & EVEX_P1VVVV
) >> 3;
1251 prefix
.vex_lp
= ((prefix
.evex
[2] & EVEX_P2LL
) >> (5-2)) |
1252 (prefix
.evex
[1] & EVEX_P1PP
);
1254 ix
= itable_vex
[prefix
.vex_c
][prefix
.vex_m
][prefix
.vex_lp
& 3];
1261 if ((data
[1] & 030) != 0 &&
1262 (segsize
== 64 || (data
[1] & 0xc0) == 0xc0)) {
1263 fetch_or_return(origdata
, data
, data_size
, 3);
1264 prefix
.vex
[0] = *data
++;
1265 prefix
.vex
[1] = *data
++;
1266 prefix
.vex
[2] = *data
++;
1269 prefix
.vex_c
= RV_XOP
;
1271 prefix
.rex
|= (~prefix
.vex
[1] >> 5) & 7; /* REX_RXB */
1272 prefix
.rex
|= (prefix
.vex
[2] >> (7-3)) & REX_W
;
1273 prefix
.vex_m
= prefix
.vex
[1] & 0x1f;
1274 prefix
.vex_v
= (~prefix
.vex
[2] >> 3) & 15;
1275 prefix
.vex_lp
= prefix
.vex
[2] & 7;
1277 ix
= itable_vex
[RV_XOP
][prefix
.vex_m
][prefix
.vex_lp
& 3];
1298 if (segsize
== 64) {
1299 fetch_or_return(origdata
, data
, data_size
, 1);
1300 prefix
.rex
= *data
++;
1301 if (prefix
.rex
& REX_W
)
1313 iflag_set_all(&best
); /* Worst possible */
1315 best_pref
= INT_MAX
;
1318 return 0; /* No instruction table at all... */
1321 fetch_or_return(origdata
, dp
, data_size
, 1);
1323 while (ix
->n
== -1) {
1324 fetch_or_return(origdata
, dp
, data_size
, 1);
1325 ix
= (const struct disasm_index
*)ix
->p
+ *dp
++;
1328 p
= (const struct itemplate
* const *)ix
->p
;
1329 for (n
= ix
->n
; n
; n
--, p
++) {
1330 if ((length
= matches(*p
, data
, &prefix
, segsize
, &tmp_ins
))) {
1333 * Final check to make sure the types of r/m match up.
1334 * XXX: Need to make sure this is actually correct.
1336 for (i
= 0; i
< (*p
)->operands
; i
++) {
1338 /* If it's a mem-only EA but we have a
1340 ((tmp_ins
.oprs
[i
].segment
& SEG_RMREG
) &&
1341 is_class(MEMORY
, (*p
)->opd
[i
])) ||
1342 /* If it's a reg-only EA but we have a memory
1344 (!(tmp_ins
.oprs
[i
].segment
& SEG_RMREG
) &&
1345 !(REG_EA
& ~(*p
)->opd
[i
]) &&
1346 !((*p
)->opd
[i
] & REG_SMASK
)) ||
1347 /* Register type mismatch (eg FS vs REG_DESS):
1349 ((((*p
)->opd
[i
] & (REGISTER
| FPUREG
)) ||
1350 (tmp_ins
.oprs
[i
].segment
& SEG_RMREG
)) &&
1351 !whichreg((*p
)->opd
[i
],
1352 tmp_ins
.oprs
[i
].basereg
, tmp_ins
.rex
))
1360 * Note: we always prefer instructions which incorporate
1361 * prefixes in the instructions themselves. This is to allow
1362 * e.g. PAUSE to be preferred to REP NOP, and deal with
1363 * MMX/SSE instructions where prefixes are used to select
1364 * between MMX and SSE register sets or outright opcode
1369 goodness
= iflag_pfmask(*p
);
1370 goodness
= iflag_xor(&goodness
, prefer
);
1372 for (i
= 0; i
< MAXPREFIX
; i
++)
1373 if (tmp_ins
.prefixes
[i
])
1375 if (nprefix
< best_pref
||
1376 (nprefix
== best_pref
&&
1377 iflag_cmp(&goodness
, &best
) < 0)) {
1378 /* This is the best one found so far */
1381 best_pref
= nprefix
;
1382 best_length
= length
;
1390 return 0; /* no instruction was matched */
1392 /* Pick the best match */
1394 length
= best_length
;
1398 /* TODO: snprintf returns the value that the string would have if
1399 * the buffer were long enough, and not the actual length of
1400 * the returned string, so each instance of using the return
1401 * value of snprintf should actually be checked to assure that
1402 * the return value is "sane." Maybe a macro wrapper could
1403 * be used for that purpose.
1405 for (i
= 0; i
< MAXPREFIX
; i
++) {
1406 const char *prefix
= prefix_name(ins
.prefixes
[i
]);
1408 slen
+= snprintf(output
+slen
, outbufsize
-slen
, "%s ", prefix
);
1412 if (i
>= FIRST_COND_OPCODE
)
1413 slen
+= snprintf(output
+ slen
, outbufsize
- slen
, "%s%s",
1414 nasm_insn_names
[i
], condition_name
[ins
.condition
]);
1416 slen
+= snprintf(output
+ slen
, outbufsize
- slen
, "%s",
1417 nasm_insn_names
[i
]);
1420 is_evex
= !!(ins
.rex
& REX_EV
);
1421 length
+= data
- origdata
; /* fix up for prefixes */
1422 for (i
= 0; i
< (*p
)->operands
; i
++) {
1423 opflags_t t
= (*p
)->opd
[i
];
1424 decoflags_t deco
= (*p
)->deco
[i
];
1425 const operand
*o
= &ins
.oprs
[i
];
1428 output
[slen
++] = (colon
? ':' : i
== 0 ? ' ' : ',');
1431 if (o
->segment
& SEG_RELATIVE
) {
1432 offs
+= offset
+ length
;
1434 * sort out wraparound
1436 if (!(o
->segment
& (SEG_32BIT
|SEG_64BIT
)))
1438 else if (segsize
!= 64)
1442 * add sync marker, if autosync is on
1453 if ((t
& (REGISTER
| FPUREG
)) ||
1454 (o
->segment
& SEG_RMREG
)) {
1456 reg
= whichreg(t
, o
->basereg
, ins
.rex
);
1458 slen
+= snprintf(output
+ slen
, outbufsize
- slen
, "to ");
1459 slen
+= snprintf(output
+ slen
, outbufsize
- slen
, "%s",
1460 nasm_reg_names
[reg
-EXPR_REG_START
]);
1461 if (t
& REGSET_MASK
)
1462 slen
+= snprintf(output
+ slen
, outbufsize
- slen
, "+%d",
1463 (int)((t
& REGSET_MASK
) >> (REGSET_SHIFT
-1))-1);
1464 if (is_evex
&& deco
)
1465 slen
+= append_evex_reg_deco(output
+ slen
, outbufsize
- slen
,
1467 } else if (!(UNITY
& ~t
)) {
1468 output
[slen
++] = '1';
1469 } else if (t
& IMMEDIATE
) {
1472 snprintf(output
+ slen
, outbufsize
- slen
, "byte ");
1473 if (o
->segment
& SEG_SIGNED
) {
1476 output
[slen
++] = '-';
1478 output
[slen
++] = '+';
1480 } else if (t
& BITS16
) {
1482 snprintf(output
+ slen
, outbufsize
- slen
, "word ");
1483 } else if (t
& BITS32
) {
1485 snprintf(output
+ slen
, outbufsize
- slen
, "dword ");
1486 } else if (t
& BITS64
) {
1488 snprintf(output
+ slen
, outbufsize
- slen
, "qword ");
1489 } else if (t
& NEAR
) {
1491 snprintf(output
+ slen
, outbufsize
- slen
, "near ");
1492 } else if (t
& SHORT
) {
1494 snprintf(output
+ slen
, outbufsize
- slen
, "short ");
1497 snprintf(output
+ slen
, outbufsize
- slen
, "0x%"PRIx64
"",
1499 } else if (!(MEM_OFFS
& ~t
)) {
1501 snprintf(output
+ slen
, outbufsize
- slen
,
1502 "[%s%s%s0x%"PRIx64
"]",
1503 (segover
? segover
: ""),
1504 (segover
? ":" : ""),
1505 (o
->disp_size
== 64 ? "qword " :
1506 o
->disp_size
== 32 ? "dword " :
1507 o
->disp_size
== 16 ? "word " : ""), offs
);
1509 } else if (is_class(REGMEM
, t
)) {
1510 int started
= false;
1513 snprintf(output
+ slen
, outbufsize
- slen
, "byte ");
1516 snprintf(output
+ slen
, outbufsize
- slen
, "word ");
1519 snprintf(output
+ slen
, outbufsize
- slen
, "dword ");
1522 snprintf(output
+ slen
, outbufsize
- slen
, "qword ");
1525 snprintf(output
+ slen
, outbufsize
- slen
, "tword ");
1526 if ((ins
.evex_p
[2] & EVEX_P2B
) && (deco
& BRDCAST_MASK
)) {
1527 /* when broadcasting, each element size should be used */
1528 if (deco
& BR_BITS32
)
1530 snprintf(output
+ slen
, outbufsize
- slen
, "dword ");
1531 else if (deco
& BR_BITS64
)
1533 snprintf(output
+ slen
, outbufsize
- slen
, "qword ");
1537 snprintf(output
+ slen
, outbufsize
- slen
, "oword ");
1540 snprintf(output
+ slen
, outbufsize
- slen
, "yword ");
1543 snprintf(output
+ slen
, outbufsize
- slen
, "zword ");
1546 slen
+= snprintf(output
+ slen
, outbufsize
- slen
, "far ");
1549 snprintf(output
+ slen
, outbufsize
- slen
, "near ");
1550 output
[slen
++] = '[';
1552 slen
+= snprintf(output
+ slen
, outbufsize
- slen
, "%s",
1553 (o
->disp_size
== 64 ? "qword " :
1554 o
->disp_size
== 32 ? "dword " :
1555 o
->disp_size
== 16 ? "word " :
1557 if (o
->eaflags
& EAF_REL
)
1558 slen
+= snprintf(output
+ slen
, outbufsize
- slen
, "rel ");
1561 snprintf(output
+ slen
, outbufsize
- slen
, "%s:",
1565 if (o
->basereg
!= -1) {
1566 slen
+= snprintf(output
+ slen
, outbufsize
- slen
, "%s",
1567 nasm_reg_names
[(o
->basereg
-EXPR_REG_START
)]);
1570 if (o
->indexreg
!= -1 && !itemp_has(*best_p
, IF_MIB
)) {
1572 output
[slen
++] = '+';
1573 slen
+= snprintf(output
+ slen
, outbufsize
- slen
, "%s",
1574 nasm_reg_names
[(o
->indexreg
-EXPR_REG_START
)]);
1577 snprintf(output
+ slen
, outbufsize
- slen
, "*%d",
1583 if (o
->segment
& SEG_DISP8
) {
1586 uint32_t offset
= offs
;
1587 if ((int32_t)offset
< 0) {
1594 snprintf(output
+ slen
, outbufsize
- slen
, "%s0x%"PRIx32
"",
1598 uint8_t offset
= offs
;
1599 if ((int8_t)offset
< 0) {
1606 snprintf(output
+ slen
, outbufsize
- slen
, "%s0x%"PRIx8
"",
1609 } else if (o
->segment
& SEG_DISP16
) {
1611 uint16_t offset
= offs
;
1612 if ((int16_t)offset
< 0 && started
) {
1616 prefix
= started
? "+" : "";
1619 snprintf(output
+ slen
, outbufsize
- slen
,
1620 "%s0x%"PRIx16
"", prefix
, offset
);
1621 } else if (o
->segment
& SEG_DISP32
) {
1622 if (prefix
.asize
== 64) {
1624 uint64_t offset
= offs
;
1625 if ((int32_t)offs
< 0 && started
) {
1629 prefix
= started
? "+" : "";
1632 snprintf(output
+ slen
, outbufsize
- slen
,
1633 "%s0x%"PRIx64
"", prefix
, offset
);
1636 uint32_t offset
= offs
;
1637 if ((int32_t) offset
< 0 && started
) {
1641 prefix
= started
? "+" : "";
1644 snprintf(output
+ slen
, outbufsize
- slen
,
1645 "%s0x%"PRIx32
"", prefix
, offset
);
1649 if (o
->indexreg
!= -1 && itemp_has(*best_p
, IF_MIB
)) {
1650 output
[slen
++] = ',';
1651 slen
+= snprintf(output
+ slen
, outbufsize
- slen
, "%s",
1652 nasm_reg_names
[(o
->indexreg
-EXPR_REG_START
)]);
1655 snprintf(output
+ slen
, outbufsize
- slen
, "*%d",
1660 output
[slen
++] = ']';
1662 if (is_evex
&& deco
)
1663 slen
+= append_evex_mem_deco(output
+ slen
, outbufsize
- slen
,
1664 t
, deco
, ins
.evex_p
);
1667 snprintf(output
+ slen
, outbufsize
- slen
, "<operand%d>",
1671 output
[slen
] = '\0';
1672 if (segover
) { /* unused segment override */
1674 int count
= slen
+ 1;
1676 p
[count
+ 3] = p
[count
];
1677 strncpy(output
, segover
, 2);
1684 * This is called when we don't have a complete instruction. If it
1685 * is a standalone *single-byte* prefix show it as such, otherwise
1686 * print it as a literal.
1688 int32_t eatbyte(uint8_t *data
, char *output
, int outbufsize
, int segsize
)
1690 uint8_t byte
= *data
;
1691 const char *str
= NULL
;
1725 str
= (segsize
== 16) ? "o32" : "o16";
1728 str
= (segsize
== 32) ? "a16" : "a32";
1746 if (segsize
== 64) {
1747 snprintf(output
, outbufsize
, "rex%s%s%s%s%s",
1748 (byte
== REX_P
) ? "" : ".",
1749 (byte
& REX_W
) ? "w" : "",
1750 (byte
& REX_R
) ? "r" : "",
1751 (byte
& REX_X
) ? "x" : "",
1752 (byte
& REX_B
) ? "b" : "");
1755 /* else fall through */
1757 snprintf(output
, outbufsize
, "db 0x%02x", byte
);
1762 snprintf(output
, outbufsize
, "%s", str
);