1 /* Copyright (c) 2007, Google Inc.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
14 * * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 * Implementation of MiniDisassembler.
35 #include "mini_disassembler.h"
39 MiniDisassembler::MiniDisassembler(bool operand_default_is_32_bits
,
40 bool address_default_is_32_bits
)
41 : operand_default_is_32_bits_(operand_default_is_32_bits
),
42 address_default_is_32_bits_(address_default_is_32_bits
) {
46 MiniDisassembler::MiniDisassembler()
47 : operand_default_is_32_bits_(true),
48 address_default_is_32_bits_(true) {
52 InstructionType
MiniDisassembler::Disassemble(
53 unsigned char* start_byte
,
54 unsigned int& instruction_bytes
) {
55 // Clean up any state from previous invocations.
58 // Start by processing any prefixes.
59 unsigned char* current_byte
= start_byte
;
60 unsigned int size
= 0;
61 InstructionType instruction_type
= ProcessPrefixes(current_byte
, size
);
63 if (IT_UNKNOWN
== instruction_type
)
64 return instruction_type
;
69 // Invariant: We have stripped all prefixes, and the operand_is_32_bits_
70 // and address_is_32_bits_ flags are correctly set.
72 instruction_type
= ProcessOpcode(current_byte
, 0, size
);
74 // Check for error processing instruction
75 if ((IT_UNKNOWN
== instruction_type_
) || (IT_UNUSED
== instruction_type_
)) {
81 // Invariant: operand_bytes_ indicates the total size of operands
82 // specified by the opcode and/or ModR/M byte and/or SIB byte.
83 // pCurrentByte points to the first byte after the ModR/M byte, or after
84 // the SIB byte if it is present (i.e. the first byte of any operands
85 // encoded in the instruction).
87 // We get the total length of any prefixes, the opcode, and the ModR/M and
88 // SIB bytes if present, by taking the difference of the original starting
89 // address and the current byte (which points to the first byte of the
90 // operands if present, or to the first byte of the next instruction if
91 // they are not). Adding the count of bytes in the operands encoded in
92 // the instruction gives us the full length of the instruction in bytes.
93 instruction_bytes
+= operand_bytes_
+ (current_byte
- start_byte
);
95 // Return the instruction type, which was set by ProcessOpcode().
96 return instruction_type_
;
99 void MiniDisassembler::Initialize() {
100 operand_is_32_bits_
= operand_default_is_32_bits_
;
101 address_is_32_bits_
= address_default_is_32_bits_
;
104 should_decode_modrm_
= false;
105 instruction_type_
= IT_UNKNOWN
;
106 got_f2_prefix_
= false;
107 got_f3_prefix_
= false;
108 got_66_prefix_
= false;
111 InstructionType
MiniDisassembler::ProcessPrefixes(unsigned char* start_byte
,
112 unsigned int& size
) {
113 InstructionType instruction_type
= IT_GENERIC
;
114 const Opcode
& opcode
= s_ia32_opcode_map_
[0].table_
[*start_byte
];
116 switch (opcode
.type_
) {
117 case IT_PREFIX_ADDRESS
:
118 address_is_32_bits_
= !address_default_is_32_bits_
;
119 goto nochangeoperand
;
120 case IT_PREFIX_OPERAND
:
121 operand_is_32_bits_
= !operand_default_is_32_bits_
;
125 if (0xF2 == (*start_byte
))
126 got_f2_prefix_
= true;
127 else if (0xF3 == (*start_byte
))
128 got_f3_prefix_
= true;
129 else if (0x66 == (*start_byte
))
130 got_66_prefix_
= true;
132 instruction_type
= opcode
.type_
;
134 // we got a prefix, so add one and check next byte
135 ProcessPrefixes(start_byte
+ 1, size
);
137 break; // not a prefix byte
140 return instruction_type
;
143 InstructionType
MiniDisassembler::ProcessOpcode(unsigned char* start_byte
,
144 unsigned int table_index
,
145 unsigned int& size
) {
146 const OpcodeTable
& table
= s_ia32_opcode_map_
[table_index
]; // Get our table
147 unsigned char current_byte
= (*start_byte
) >> table
.shift_
;
148 current_byte
= current_byte
& table
.mask_
; // Mask out the bits we will use
150 // Check whether the byte we have is inside the table we have.
151 if (current_byte
< table
.min_lim_
|| current_byte
> table
.max_lim_
) {
152 instruction_type_
= IT_UNKNOWN
;
153 return instruction_type_
;
156 const Opcode
& opcode
= table
.table_
[current_byte
];
157 if (IT_UNUSED
== opcode
.type_
) {
158 // This instruction is not used by the IA-32 ISA, so we indicate
159 // this to the user. Probably means that we were pointed to
160 // a byte in memory that was not the start of an instruction.
161 instruction_type_
= IT_UNUSED
;
162 return instruction_type_
;
163 } else if (IT_REFERENCE
== opcode
.type_
) {
164 // We are looking at an opcode that has more bytes (or is continued
165 // in the ModR/M byte). Recursively find the opcode definition in
166 // the table for the opcode's next byte.
168 ProcessOpcode(start_byte
+ 1, opcode
.table_index_
, size
);
169 return instruction_type_
;
172 const SpecificOpcode
* specific_opcode
= (SpecificOpcode
*)&opcode
;
173 if (opcode
.is_prefix_dependent_
) {
174 if (got_f2_prefix_
&& opcode
.opcode_if_f2_prefix_
.mnemonic_
!= 0) {
175 specific_opcode
= &opcode
.opcode_if_f2_prefix_
;
176 } else if (got_f3_prefix_
&& opcode
.opcode_if_f3_prefix_
.mnemonic_
!= 0) {
177 specific_opcode
= &opcode
.opcode_if_f3_prefix_
;
178 } else if (got_66_prefix_
&& opcode
.opcode_if_66_prefix_
.mnemonic_
!= 0) {
179 specific_opcode
= &opcode
.opcode_if_66_prefix_
;
183 // Inv: The opcode type is known.
184 instruction_type_
= specific_opcode
->type_
;
186 // Let's process the operand types to see if we have any immediate
187 // operands, and/or a ModR/M byte.
189 ProcessOperand(specific_opcode
->flag_dest_
);
190 ProcessOperand(specific_opcode
->flag_source_
);
191 ProcessOperand(specific_opcode
->flag_aux_
);
193 // Inv: We have processed the opcode and incremented operand_bytes_
194 // by the number of bytes of any operands specified by the opcode
195 // that are stored in the instruction (not registers etc.). Now
196 // we need to return the total number of bytes for the opcode and
197 // for the ModR/M or SIB bytes if they are present.
199 if (table
.mask_
!= 0xff) {
201 // we're looking at a ModR/M byte so we're not going to
202 // count that into the opcode size
203 ProcessModrm(start_byte
, size
);
206 // need to count the ModR/M byte even if it's just being
207 // used for opcode extension
213 // The ModR/M byte is the next byte.
215 ProcessModrm(start_byte
+ 1, size
);
224 bool MiniDisassembler::ProcessOperand(int flag_operand
) {
225 bool succeeded
= true;
226 if (AM_NOT_USED
== flag_operand
)
229 // Decide what to do based on the addressing mode.
230 switch (flag_operand
& AM_MASK
) {
231 // No ModR/M byte indicated by these addressing modes, and no
232 // additional (e.g. immediate) parameters.
233 case AM_A
: // Direct address
234 case AM_F
: // EFLAGS register
235 case AM_X
: // Memory addressed by the DS:SI register pair
236 case AM_Y
: // Memory addressed by the ES:DI register pair
237 case AM_IMPLICIT
: // Parameter is implicit, occupies no space in
241 // There is a ModR/M byte but it does not necessarily need
243 case AM_C
: // reg field of ModR/M selects a control register
244 case AM_D
: // reg field of ModR/M selects a debug register
245 case AM_G
: // reg field of ModR/M selects a general register
246 case AM_P
: // reg field of ModR/M selects an MMX register
247 case AM_R
: // mod field of ModR/M may refer only to a general register
248 case AM_S
: // reg field of ModR/M selects a segment register
249 case AM_T
: // reg field of ModR/M selects a test register
250 case AM_V
: // reg field of ModR/M selects a 128-bit XMM register
254 // In these addressing modes, there is a ModR/M byte and it needs to be
255 // decoded. No other (e.g. immediate) params than indicated in ModR/M.
256 case AM_E
: // Operand is either a general-purpose register or memory,
257 // specified by ModR/M byte
258 case AM_M
: // ModR/M byte will refer only to memory
259 case AM_Q
: // Operand is either an MMX register or memory (complex
260 // evaluation), specified by ModR/M byte
261 case AM_W
: // Operand is either a 128-bit XMM register or memory (complex
262 // eval), specified by ModR/M byte
264 should_decode_modrm_
= true;
267 // These addressing modes specify an immediate or an offset value
268 // directly, so we need to look at the operand type to see how many
270 case AM_I
: // Immediate data.
271 case AM_J
: // Jump to offset.
272 case AM_O
: // Operand is at offset.
273 switch (flag_operand
& OT_MASK
) {
274 case OT_B
: // Byte regardless of operand-size attribute.
275 operand_bytes_
+= OS_BYTE
;
277 case OT_C
: // Byte or word, depending on operand-size attribute.
278 if (operand_is_32_bits_
)
279 operand_bytes_
+= OS_WORD
;
281 operand_bytes_
+= OS_BYTE
;
283 case OT_D
: // Doubleword, regardless of operand-size attribute.
284 operand_bytes_
+= OS_DOUBLE_WORD
;
286 case OT_DQ
: // Double-quadword, regardless of operand-size attribute.
287 operand_bytes_
+= OS_DOUBLE_QUAD_WORD
;
289 case OT_P
: // 32-bit or 48-bit pointer, depending on operand-size
291 if (operand_is_32_bits_
)
292 operand_bytes_
+= OS_48_BIT_POINTER
;
294 operand_bytes_
+= OS_32_BIT_POINTER
;
296 case OT_PS
: // 128-bit packed single-precision floating-point data.
297 operand_bytes_
+= OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING
;
299 case OT_Q
: // Quadword, regardless of operand-size attribute.
300 operand_bytes_
+= OS_QUAD_WORD
;
302 case OT_S
: // 6-byte pseudo-descriptor.
303 operand_bytes_
+= OS_PSEUDO_DESCRIPTOR
;
305 case OT_SD
: // Scalar Double-Precision Floating-Point Value
306 case OT_PD
: // Unaligned packed double-precision floating point value
307 operand_bytes_
+= OS_DOUBLE_PRECISION_FLOATING
;
310 // Scalar element of a 128-bit packed single-precision
312 // We simply return enItUnknown since we don't have to support
316 case OT_V
: // Word or doubleword, depending on operand-size attribute.
317 if (operand_is_32_bits_
)
318 operand_bytes_
+= OS_DOUBLE_WORD
;
320 operand_bytes_
+= OS_WORD
;
322 case OT_W
: // Word, regardless of operand-size attribute.
323 operand_bytes_
+= OS_WORD
;
326 // Can safely ignore these.
327 case OT_A
: // Two one-word operands in memory or two double-word
328 // operands in memory
329 case OT_PI
: // Quadword MMX technology register (e.g. mm0)
330 case OT_SI
: // Doubleword integer register (e.g., eax)
345 bool MiniDisassembler::ProcessModrm(unsigned char* start_byte
,
346 unsigned int& size
) {
347 // If we don't need to decode, we just return the size of the ModR/M
348 // byte (there is never a SIB byte in this case).
349 if (!should_decode_modrm_
) {
354 // We never care about the reg field, only the combination of the mod
355 // and r/m fields, so let's start by packing those fields together into
357 unsigned char modrm
= (*start_byte
);
358 unsigned char mod
= modrm
& 0xC0; // mask out top two bits to get mod field
359 modrm
= modrm
& 0x07; // mask out bottom 3 bits to get r/m field
360 mod
= mod
>> 3; // shift the mod field to the right place
361 modrm
= mod
| modrm
; // combine the r/m and mod fields as discussed
362 mod
= mod
>> 3; // shift the mod field to bits 2..0
364 // Invariant: modrm contains the mod field in bits 4..3 and the r/m field
365 // in bits 2..0, and mod contains the mod field in bits 2..0
367 const ModrmEntry
* modrm_entry
= 0;
368 if (address_is_32_bits_
)
369 modrm_entry
= &s_ia32_modrm_map_
[modrm
];
371 modrm_entry
= &s_ia16_modrm_map_
[modrm
];
373 // Invariant: modrm_entry points to information that we need to decode
376 // Add to the count of operand bytes, if the ModR/M byte indicates
377 // that some operands are encoded in the instruction.
378 if (modrm_entry
->is_encoded_in_instruction_
)
379 operand_bytes_
+= modrm_entry
->operand_size_
;
381 // Process the SIB byte if necessary, and return the count
382 // of ModR/M and SIB bytes.
383 if (modrm_entry
->use_sib_byte_
) {
385 return ProcessSib(start_byte
+ 1, mod
, size
);
392 bool MiniDisassembler::ProcessSib(unsigned char* start_byte
,
394 unsigned int& size
) {
395 // get the mod field from the 2..0 bits of the SIB byte
396 unsigned char sib_base
= (*start_byte
) & 0x07;
397 if (0x05 == sib_base
) {
399 case 0x00: // mod == 00
400 case 0x02: // mod == 10
401 operand_bytes_
+= OS_DOUBLE_WORD
;
403 case 0x01: // mod == 01
404 operand_bytes_
+= OS_BYTE
;
406 case 0x03: // mod == 11
407 // According to the IA-32 docs, there does not seem to be a disp
408 // value for this value of mod
418 }; // namespace sidestep