2 /*--------------------------------------------------------------------*/
3 /*--- begin guest_x86_toIR.c ---*/
4 /*--------------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2004-2017 OpenWorks LLP
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
28 The GNU General Public License is contained in the file COPYING.
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
36 /* Translates x86 code to IR. */
40 All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked
41 to ensure a 32-bit value is being written.
43 FUCOMI(P): what happens to A and S flags? Currently are forced
48 * all arithmetic done at 64 bits
50 * no FP exceptions, except for handling stack over/underflow
52 * FP rounding mode observed only for float->int conversions
53 and int->float conversions which could lose accuracy, and
54 for float-to-float rounding. For all other operations,
55 round-to-nearest is used, regardless.
57 * some of the FCOM cases could do with testing -- not convinced
58 that the args are the right way round.
60 * FSAVE does not re-initialise the FPU; it should do
62 * FINIT not only initialises the FPU environment, it also
63 zeroes all the FP registers. It should leave the registers
66 SAHF should cause eflags[1] == 1, and in fact it produces 0. As
67 per Intel docs this bit has no meaning anyway. Since PUSHF is the
68 only way to observe eflags[1], a proper fix would be to make that
71 The state of %eflags.AC (alignment check, bit 18) is recorded by
72 the simulation (viz, if you set it with popf then a pushf produces
73 the value you set it to), but it is otherwise ignored. In
74 particular, setting it to 1 does NOT cause alignment checking to
75 happen. Programs that set it to 1 and then rely on the resulting
76 SIGBUSs to inform them of misaligned accesses will not work.
78 Implementation of sysenter is necessarily partial. sysenter is a
79 kind of system call entry. When doing a sysenter, the return
80 address is not known -- that is something that is beyond Vex's
81 knowledge. So the generated IR forces a return to the scheduler,
82 which can do what it likes to simulate the systenter, but it MUST
83 set this thread's guest_EIP field with the continuation address
84 before resuming execution. If that doesn't happen, the thread will
85 jump to address zero, which is probably fatal.
87 This module uses global variables and so is not MT-safe (if that
88 should ever become relevant).
90 The delta values are 32-bit ints, not 64-bit ints. That means
91 this module may not work right if run on a 64-bit host. That should
92 be fixed properly, really -- if anyone ever wants to use Vex to
93 translate x86 code for execution on a 64-bit host.
95 casLE (implementation of lock-prefixed insns) and rep-prefixed
96 insns: the side-exit back to the start of the insn is done with
97 Ijk_Boring. This is quite wrong, it should be done with
98 Ijk_NoRedir, since otherwise the side exit, which is intended to
99 restart the instruction for whatever reason, could go somewhere
100 entirely else. Doing it right (with Ijk_NoRedir jumps) would make
101 no-redir jumps performance critical, at least for rep-prefixed
102 instructions, since all iterations thereof would involve such a
103 jump. It's not such a big deal with casLE since the side exit is
104 only taken if the CAS fails, that is, the location is contended,
105 which is relatively unlikely.
107 XXXX: Nov 2009: handling of SWP on ARM suffers from the same
110 Note also, the test for CAS success vs failure is done using
111 Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary
112 Iop_Cmp{EQ,NE} equivalents. This is so as to tell Memcheck that it
113 shouldn't definedness-check these comparisons. See
114 COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for
115 background/rationale.
118 /* Performance holes:
120 - fcom ; fstsw %ax ; sahf
121 sahf does not update the O flag (sigh) and so O needs to
122 be computed. This is done expensively; it would be better
123 to have a calculate_eflags_o helper.
125 - emwarns; some FP codes can generate huge numbers of these
126 if the fpucw is changed in an inner loop. It would be
127 better for the guest state to have an emwarn-enable reg
128 which can be set zero or nonzero. If it is zero, emwarns
129 are not flagged, and instead control just flows all the
130 way through bbs as usual.
133 /* "Special" instructions.
135 This instruction decoder can decode three special instructions
136 which mean nothing natively (are no-ops as far as regs/mem are
137 concerned) but have meaning for supporting Valgrind. A special
138 instruction is flagged by the 12-byte preamble C1C703 C1C70D C1C71D
139 C1C713 (in the standard interpretation, that means: roll $3, %edi;
140 roll $13, %edi; roll $29, %edi; roll $19, %edi). Following that,
141 one of the following 3 are allowed (standard interpretation in
144 87DB (xchgl %ebx,%ebx) %EDX = client_request ( %EAX )
145 87C9 (xchgl %ecx,%ecx) %EAX = guest_NRADDR
146 87D2 (xchgl %edx,%edx) call-noredir *%EAX
147 87FF (xchgl %edi,%edi) IR injection
149 Any other bytes following the 12-byte preamble are illegal and
150 constitute a failure in instruction decoding. This all assumes
151 that the preamble will never occur except in specific code
152 fragments designed for Valgrind to catch.
154 No prefixes may precede a "Special" instruction.
157 /* LOCK prefixed instructions. These are translated using IR-level
158 CAS statements (IRCAS) and are believed to preserve atomicity, even
159 from the point of view of some other process racing against a
160 simulated one (presumably they communicate via a shared memory
163 Handlers which are aware of LOCK prefixes are:
164 dis_op2_G_E (add, or, adc, sbb, and, sub, xor)
165 dis_cmpxchg_G_E (cmpxchg)
166 dis_Grp1 (add, or, adc, sbb, and, sub, xor)
170 dis_Grp8_Imm (bts, btc, btr)
171 dis_bt_G_E (bts, btc, btr)
176 #include "libvex_basictypes.h"
177 #include "libvex_ir.h"
179 #include "libvex_guest_x86.h"
181 #include "main_util.h"
182 #include "main_globals.h"
183 #include "guest_generic_bb_to_IR.h"
184 #include "guest_generic_x87.h"
185 #include "guest_x86_defs.h"
188 /*------------------------------------------------------------*/
190 /*------------------------------------------------------------*/
192 /* These are set at the start of the translation of an insn, right
193 down in disInstr_X86, so that we don't have to pass them around
194 endlessly. They are all constant during the translation of any
197 /* We need to know this to do sub-register accesses correctly. */
198 static VexEndness host_endness
;
200 /* Pointer to the guest code area (points to start of BB, not to the
201 insn being processed). */
202 static const UChar
* guest_code
;
204 /* The guest address corresponding to guest_code[0]. */
205 static Addr32 guest_EIP_bbstart
;
207 /* The guest address for the instruction currently being
209 static Addr32 guest_EIP_curr_instr
;
211 /* The IRSB* into which we're generating code. */
215 /*------------------------------------------------------------*/
216 /*--- Debugging output ---*/
217 /*------------------------------------------------------------*/
219 #define DIP(format, args...) \
220 if (vex_traceflags & VEX_TRACE_FE) \
221 vex_printf(format, ## args)
223 #define DIS(buf, format, args...) \
224 if (vex_traceflags & VEX_TRACE_FE) \
225 vex_sprintf(buf, format, ## args)
228 /*------------------------------------------------------------*/
229 /*--- Offsets of various parts of the x86 guest state. ---*/
230 /*------------------------------------------------------------*/
232 #define OFFB_EAX offsetof(VexGuestX86State,guest_EAX)
233 #define OFFB_EBX offsetof(VexGuestX86State,guest_EBX)
234 #define OFFB_ECX offsetof(VexGuestX86State,guest_ECX)
235 #define OFFB_EDX offsetof(VexGuestX86State,guest_EDX)
236 #define OFFB_ESP offsetof(VexGuestX86State,guest_ESP)
237 #define OFFB_EBP offsetof(VexGuestX86State,guest_EBP)
238 #define OFFB_ESI offsetof(VexGuestX86State,guest_ESI)
239 #define OFFB_EDI offsetof(VexGuestX86State,guest_EDI)
241 #define OFFB_EIP offsetof(VexGuestX86State,guest_EIP)
243 #define OFFB_CC_OP offsetof(VexGuestX86State,guest_CC_OP)
244 #define OFFB_CC_DEP1 offsetof(VexGuestX86State,guest_CC_DEP1)
245 #define OFFB_CC_DEP2 offsetof(VexGuestX86State,guest_CC_DEP2)
246 #define OFFB_CC_NDEP offsetof(VexGuestX86State,guest_CC_NDEP)
248 #define OFFB_FPREGS offsetof(VexGuestX86State,guest_FPREG[0])
249 #define OFFB_FPTAGS offsetof(VexGuestX86State,guest_FPTAG[0])
250 #define OFFB_DFLAG offsetof(VexGuestX86State,guest_DFLAG)
251 #define OFFB_IDFLAG offsetof(VexGuestX86State,guest_IDFLAG)
252 #define OFFB_ACFLAG offsetof(VexGuestX86State,guest_ACFLAG)
253 #define OFFB_FTOP offsetof(VexGuestX86State,guest_FTOP)
254 #define OFFB_FC3210 offsetof(VexGuestX86State,guest_FC3210)
255 #define OFFB_FPROUND offsetof(VexGuestX86State,guest_FPROUND)
257 #define OFFB_CS offsetof(VexGuestX86State,guest_CS)
258 #define OFFB_DS offsetof(VexGuestX86State,guest_DS)
259 #define OFFB_ES offsetof(VexGuestX86State,guest_ES)
260 #define OFFB_FS offsetof(VexGuestX86State,guest_FS)
261 #define OFFB_GS offsetof(VexGuestX86State,guest_GS)
262 #define OFFB_SS offsetof(VexGuestX86State,guest_SS)
263 #define OFFB_LDT offsetof(VexGuestX86State,guest_LDT)
264 #define OFFB_GDT offsetof(VexGuestX86State,guest_GDT)
266 #define OFFB_SSEROUND offsetof(VexGuestX86State,guest_SSEROUND)
267 #define OFFB_XMM0 offsetof(VexGuestX86State,guest_XMM0)
268 #define OFFB_XMM1 offsetof(VexGuestX86State,guest_XMM1)
269 #define OFFB_XMM2 offsetof(VexGuestX86State,guest_XMM2)
270 #define OFFB_XMM3 offsetof(VexGuestX86State,guest_XMM3)
271 #define OFFB_XMM4 offsetof(VexGuestX86State,guest_XMM4)
272 #define OFFB_XMM5 offsetof(VexGuestX86State,guest_XMM5)
273 #define OFFB_XMM6 offsetof(VexGuestX86State,guest_XMM6)
274 #define OFFB_XMM7 offsetof(VexGuestX86State,guest_XMM7)
276 #define OFFB_EMNOTE offsetof(VexGuestX86State,guest_EMNOTE)
278 #define OFFB_CMSTART offsetof(VexGuestX86State,guest_CMSTART)
279 #define OFFB_CMLEN offsetof(VexGuestX86State,guest_CMLEN)
280 #define OFFB_NRADDR offsetof(VexGuestX86State,guest_NRADDR)
282 #define OFFB_IP_AT_SYSCALL offsetof(VexGuestX86State,guest_IP_AT_SYSCALL)
285 /*------------------------------------------------------------*/
286 /*--- Helper bits and pieces for deconstructing the ---*/
287 /*--- x86 insn stream. ---*/
288 /*------------------------------------------------------------*/
290 /* This is the Intel register encoding -- integer regs. */
300 #define R_AL (0+R_EAX)
301 #define R_AH (4+R_EAX)
303 /* This is the Intel register encoding -- segment regs. */
312 /* Add a statement to the list held by "irbb". */
313 static void stmt ( IRStmt
* st
)
315 addStmtToIRSB( irsb
, st
);
318 /* Generate a new temporary of the given type. */
319 static IRTemp
newTemp ( IRType ty
)
321 vassert(isPlausibleIRType(ty
));
322 return newIRTemp( irsb
->tyenv
, ty
);
325 /* Various simple conversions */
327 static UInt
extend_s_8to32( UInt x
)
329 return (UInt
)((Int
)(x
<< 24) >> 24);
332 static UInt
extend_s_16to32 ( UInt x
)
334 return (UInt
)((Int
)(x
<< 16) >> 16);
337 /* Fetch a byte from the guest insn stream. */
338 static UChar
getIByte ( Int delta
)
340 return guest_code
[delta
];
343 /* Extract the reg field from a modRM byte. */
344 static Int
gregOfRM ( UChar mod_reg_rm
)
346 return (Int
)( (mod_reg_rm
>> 3) & 7 );
349 /* Figure out whether the mod and rm parts of a modRM byte refer to a
350 register or memory. If so, the byte will have the form 11XXXYYY,
351 where YYY is the register number. */
352 static Bool
epartIsReg ( UChar mod_reg_rm
)
354 return toBool(0xC0 == (mod_reg_rm
& 0xC0));
357 /* ... and extract the register number ... */
358 static Int
eregOfRM ( UChar mod_reg_rm
)
360 return (Int
)(mod_reg_rm
& 0x7);
363 /* Get a 8/16/32-bit unsigned value out of the insn stream. */
365 static UChar
getUChar ( Int delta
)
367 UChar v
= guest_code
[delta
+0];
371 static UInt
getUDisp16 ( Int delta
)
373 UInt v
= guest_code
[delta
+1]; v
<<= 8;
374 v
|= guest_code
[delta
+0];
378 static UInt
getUDisp32 ( Int delta
)
380 UInt v
= guest_code
[delta
+3]; v
<<= 8;
381 v
|= guest_code
[delta
+2]; v
<<= 8;
382 v
|= guest_code
[delta
+1]; v
<<= 8;
383 v
|= guest_code
[delta
+0];
387 static UInt
getUDisp ( Int size
, Int delta
)
390 case 4: return getUDisp32(delta
);
391 case 2: return getUDisp16(delta
);
392 case 1: return (UInt
)getUChar(delta
);
393 default: vpanic("getUDisp(x86)");
395 return 0; /*notreached*/
399 /* Get a byte value out of the insn stream and sign-extend to 32
401 static UInt
getSDisp8 ( Int delta
)
403 return extend_s_8to32( (UInt
) (guest_code
[delta
]) );
406 static UInt
getSDisp16 ( Int delta0
)
408 const UChar
* eip
= &guest_code
[delta0
];
410 d
|= ((*eip
++) << 8);
411 return extend_s_16to32(d
);
414 static UInt
getSDisp ( Int size
, Int delta
)
417 case 4: return getUDisp32(delta
);
418 case 2: return getSDisp16(delta
);
419 case 1: return getSDisp8(delta
);
420 default: vpanic("getSDisp(x86)");
422 return 0; /*notreached*/
426 /*------------------------------------------------------------*/
427 /*--- Helpers for constructing IR. ---*/
428 /*------------------------------------------------------------*/
430 /* Create a 1/2/4 byte read of an x86 integer registers. For 16/8 bit
431 register references, we need to take the host endianness into
432 account. Supplied value is 0 .. 7 and in the Intel instruction
435 static IRType
szToITy ( Int n
)
438 case 1: return Ity_I8
;
439 case 2: return Ity_I16
;
440 case 4: return Ity_I32
;
441 default: vpanic("szToITy(x86)");
445 /* On a little-endian host, less significant bits of the guest
446 registers are at lower addresses. Therefore, if a reference to a
447 register low half has the safe guest state offset as a reference to
450 static Int
integerGuestRegOffset ( Int sz
, UInt archreg
)
452 vassert(archreg
< 8);
454 /* Correct for little-endian host only. */
455 vassert(host_endness
== VexEndnessLE
);
457 if (sz
== 4 || sz
== 2 || (sz
== 1 && archreg
< 4)) {
459 case R_EAX
: return OFFB_EAX
;
460 case R_EBX
: return OFFB_EBX
;
461 case R_ECX
: return OFFB_ECX
;
462 case R_EDX
: return OFFB_EDX
;
463 case R_ESI
: return OFFB_ESI
;
464 case R_EDI
: return OFFB_EDI
;
465 case R_ESP
: return OFFB_ESP
;
466 case R_EBP
: return OFFB_EBP
;
467 default: vpanic("integerGuestRegOffset(x86,le)(4,2)");
471 vassert(archreg
>= 4 && archreg
< 8 && sz
== 1);
473 case R_EAX
: return 1+ OFFB_EAX
;
474 case R_EBX
: return 1+ OFFB_EBX
;
475 case R_ECX
: return 1+ OFFB_ECX
;
476 case R_EDX
: return 1+ OFFB_EDX
;
477 default: vpanic("integerGuestRegOffset(x86,le)(1h)");
481 vpanic("integerGuestRegOffset(x86,le)");
484 static Int
segmentGuestRegOffset ( UInt sreg
)
487 case R_ES
: return OFFB_ES
;
488 case R_CS
: return OFFB_CS
;
489 case R_SS
: return OFFB_SS
;
490 case R_DS
: return OFFB_DS
;
491 case R_FS
: return OFFB_FS
;
492 case R_GS
: return OFFB_GS
;
493 default: vpanic("segmentGuestRegOffset(x86)");
497 static Int
xmmGuestRegOffset ( UInt xmmreg
)
500 case 0: return OFFB_XMM0
;
501 case 1: return OFFB_XMM1
;
502 case 2: return OFFB_XMM2
;
503 case 3: return OFFB_XMM3
;
504 case 4: return OFFB_XMM4
;
505 case 5: return OFFB_XMM5
;
506 case 6: return OFFB_XMM6
;
507 case 7: return OFFB_XMM7
;
508 default: vpanic("xmmGuestRegOffset");
512 /* Lanes of vector registers are always numbered from zero being the
513 least significant lane (rightmost in the register). */
515 static Int
xmmGuestRegLane16offset ( UInt xmmreg
, Int laneno
)
517 /* Correct for little-endian host only. */
518 vassert(host_endness
== VexEndnessLE
);
519 vassert(laneno
>= 0 && laneno
< 8);
520 return xmmGuestRegOffset( xmmreg
) + 2 * laneno
;
523 static Int
xmmGuestRegLane32offset ( UInt xmmreg
, Int laneno
)
525 /* Correct for little-endian host only. */
526 vassert(host_endness
== VexEndnessLE
);
527 vassert(laneno
>= 0 && laneno
< 4);
528 return xmmGuestRegOffset( xmmreg
) + 4 * laneno
;
531 static Int
xmmGuestRegLane64offset ( UInt xmmreg
, Int laneno
)
533 /* Correct for little-endian host only. */
534 vassert(host_endness
== VexEndnessLE
);
535 vassert(laneno
>= 0 && laneno
< 2);
536 return xmmGuestRegOffset( xmmreg
) + 8 * laneno
;
539 static IRExpr
* getIReg ( Int sz
, UInt archreg
)
541 vassert(sz
== 1 || sz
== 2 || sz
== 4);
542 vassert(archreg
< 8);
543 return IRExpr_Get( integerGuestRegOffset(sz
,archreg
),
547 /* Ditto, but write to a reg instead. */
548 static void putIReg ( Int sz
, UInt archreg
, IRExpr
* e
)
550 IRType ty
= typeOfIRExpr(irsb
->tyenv
, e
);
552 case 1: vassert(ty
== Ity_I8
); break;
553 case 2: vassert(ty
== Ity_I16
); break;
554 case 4: vassert(ty
== Ity_I32
); break;
555 default: vpanic("putIReg(x86)");
557 vassert(archreg
< 8);
558 stmt( IRStmt_Put(integerGuestRegOffset(sz
,archreg
), e
) );
561 static IRExpr
* getSReg ( UInt sreg
)
563 return IRExpr_Get( segmentGuestRegOffset(sreg
), Ity_I16
);
566 static void putSReg ( UInt sreg
, IRExpr
* e
)
568 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I16
);
569 stmt( IRStmt_Put( segmentGuestRegOffset(sreg
), e
) );
572 static IRExpr
* getXMMReg ( UInt xmmreg
)
574 return IRExpr_Get( xmmGuestRegOffset(xmmreg
), Ity_V128
);
577 static IRExpr
* getXMMRegLane64 ( UInt xmmreg
, Int laneno
)
579 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg
,laneno
), Ity_I64
);
582 static IRExpr
* getXMMRegLane64F ( UInt xmmreg
, Int laneno
)
584 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg
,laneno
), Ity_F64
);
587 static IRExpr
* getXMMRegLane32 ( UInt xmmreg
, Int laneno
)
589 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg
,laneno
), Ity_I32
);
592 static IRExpr
* getXMMRegLane32F ( UInt xmmreg
, Int laneno
)
594 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg
,laneno
), Ity_F32
);
597 static void putXMMReg ( UInt xmmreg
, IRExpr
* e
)
599 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_V128
);
600 stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg
), e
) );
603 static void putXMMRegLane64 ( UInt xmmreg
, Int laneno
, IRExpr
* e
)
605 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I64
);
606 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg
,laneno
), e
) );
609 static void putXMMRegLane64F ( UInt xmmreg
, Int laneno
, IRExpr
* e
)
611 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_F64
);
612 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg
,laneno
), e
) );
615 static void putXMMRegLane32F ( UInt xmmreg
, Int laneno
, IRExpr
* e
)
617 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_F32
);
618 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg
,laneno
), e
) );
621 static void putXMMRegLane32 ( UInt xmmreg
, Int laneno
, IRExpr
* e
)
623 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I32
);
624 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg
,laneno
), e
) );
627 static void putXMMRegLane16 ( UInt xmmreg
, Int laneno
, IRExpr
* e
)
629 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I16
);
630 stmt( IRStmt_Put( xmmGuestRegLane16offset(xmmreg
,laneno
), e
) );
633 static void assign ( IRTemp dst
, IRExpr
* e
)
635 stmt( IRStmt_WrTmp(dst
, e
) );
638 static void storeLE ( IRExpr
* addr
, IRExpr
* data
)
640 stmt( IRStmt_Store(Iend_LE
, addr
, data
) );
643 static IRExpr
* unop ( IROp op
, IRExpr
* a
)
645 return IRExpr_Unop(op
, a
);
648 static IRExpr
* binop ( IROp op
, IRExpr
* a1
, IRExpr
* a2
)
650 return IRExpr_Binop(op
, a1
, a2
);
653 static IRExpr
* triop ( IROp op
, IRExpr
* a1
, IRExpr
* a2
, IRExpr
* a3
)
655 return IRExpr_Triop(op
, a1
, a2
, a3
);
658 static IRExpr
* mkexpr ( IRTemp tmp
)
660 return IRExpr_RdTmp(tmp
);
663 static IRExpr
* mkU8 ( UInt i
)
666 return IRExpr_Const(IRConst_U8( (UChar
)i
));
669 static IRExpr
* mkU16 ( UInt i
)
672 return IRExpr_Const(IRConst_U16( (UShort
)i
));
675 static IRExpr
* mkU32 ( UInt i
)
677 return IRExpr_Const(IRConst_U32(i
));
680 static IRExpr
* mkU64 ( ULong i
)
682 return IRExpr_Const(IRConst_U64(i
));
685 static IRExpr
* mkU ( IRType ty
, UInt i
)
687 if (ty
== Ity_I8
) return mkU8(i
);
688 if (ty
== Ity_I16
) return mkU16(i
);
689 if (ty
== Ity_I32
) return mkU32(i
);
690 /* If this panics, it usually means you passed a size (1,2,4)
691 value as the IRType, rather than a real IRType. */
695 static IRExpr
* mkV128 ( UShort mask
)
697 return IRExpr_Const(IRConst_V128(mask
));
700 static IRExpr
* loadLE ( IRType ty
, IRExpr
* addr
)
702 return IRExpr_Load(Iend_LE
, ty
, addr
);
705 static IROp
mkSizedOp ( IRType ty
, IROp op8
)
708 vassert(ty
== Ity_I8
|| ty
== Ity_I16
|| ty
== Ity_I32
);
709 vassert(op8
== Iop_Add8
|| op8
== Iop_Sub8
711 || op8
== Iop_Or8
|| op8
== Iop_And8
|| op8
== Iop_Xor8
712 || op8
== Iop_Shl8
|| op8
== Iop_Shr8
|| op8
== Iop_Sar8
713 || op8
== Iop_CmpEQ8
|| op8
== Iop_CmpNE8
714 || op8
== Iop_CasCmpNE8
715 || op8
== Iop_ExpCmpNE8
717 adj
= ty
==Ity_I8
? 0 : (ty
==Ity_I16
? 1 : 2);
721 static IROp
mkWidenOp ( Int szSmall
, Int szBig
, Bool signd
)
723 if (szSmall
== 1 && szBig
== 4) {
724 return signd
? Iop_8Sto32
: Iop_8Uto32
;
726 if (szSmall
== 1 && szBig
== 2) {
727 return signd
? Iop_8Sto16
: Iop_8Uto16
;
729 if (szSmall
== 2 && szBig
== 4) {
730 return signd
? Iop_16Sto32
: Iop_16Uto32
;
732 vpanic("mkWidenOp(x86,guest)");
735 static IRExpr
* mkAnd1 ( IRExpr
* x
, IRExpr
* y
)
737 vassert(typeOfIRExpr(irsb
->tyenv
,x
) == Ity_I1
);
738 vassert(typeOfIRExpr(irsb
->tyenv
,y
) == Ity_I1
);
739 return unop(Iop_32to1
,
742 unop(Iop_1Uto32
,y
)));
745 /* Generate a compare-and-swap operation, operating on memory at
746 'addr'. The expected value is 'expVal' and the new value is
747 'newVal'. If the operation fails, then transfer control (with a
748 no-redir jump (XXX no -- see comment at top of this file)) to
749 'restart_point', which is presumably the address of the guest
750 instruction again -- retrying, essentially. */
751 static void casLE ( IRExpr
* addr
, IRExpr
* expVal
, IRExpr
* newVal
,
752 Addr32 restart_point
)
755 IRType tyE
= typeOfIRExpr(irsb
->tyenv
, expVal
);
756 IRType tyN
= typeOfIRExpr(irsb
->tyenv
, newVal
);
757 IRTemp oldTmp
= newTemp(tyE
);
758 IRTemp expTmp
= newTemp(tyE
);
760 vassert(tyE
== Ity_I32
|| tyE
== Ity_I16
|| tyE
== Ity_I8
);
761 assign(expTmp
, expVal
);
762 cas
= mkIRCAS( IRTemp_INVALID
, oldTmp
, Iend_LE
, addr
,
763 NULL
, mkexpr(expTmp
), NULL
, newVal
);
764 stmt( IRStmt_CAS(cas
) );
766 binop( mkSizedOp(tyE
,Iop_CasCmpNE8
),
767 mkexpr(oldTmp
), mkexpr(expTmp
) ),
768 Ijk_Boring
, /*Ijk_NoRedir*/
769 IRConst_U32( restart_point
),
775 /*------------------------------------------------------------*/
776 /*--- Helpers for %eflags. ---*/
777 /*------------------------------------------------------------*/
779 /* -------------- Evaluating the flags-thunk. -------------- */
781 /* Build IR to calculate all the eflags from stored
782 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
784 static IRExpr
* mk_x86g_calculate_eflags_all ( void )
787 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP
, Ity_I32
),
788 IRExpr_Get(OFFB_CC_DEP1
, Ity_I32
),
789 IRExpr_Get(OFFB_CC_DEP2
, Ity_I32
),
790 IRExpr_Get(OFFB_CC_NDEP
, Ity_I32
) );
795 "x86g_calculate_eflags_all", &x86g_calculate_eflags_all
,
798 /* Exclude OP and NDEP from definedness checking. We're only
799 interested in DEP1 and DEP2. */
800 call
->Iex
.CCall
.cee
->mcx_mask
= (1<<0) | (1<<3);
804 /* Build IR to calculate some particular condition from stored
805 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
807 static IRExpr
* mk_x86g_calculate_condition ( X86Condcode cond
)
810 = mkIRExprVec_5( mkU32(cond
),
811 IRExpr_Get(OFFB_CC_OP
, Ity_I32
),
812 IRExpr_Get(OFFB_CC_DEP1
, Ity_I32
),
813 IRExpr_Get(OFFB_CC_DEP2
, Ity_I32
),
814 IRExpr_Get(OFFB_CC_NDEP
, Ity_I32
) );
819 "x86g_calculate_condition", &x86g_calculate_condition
,
822 /* Exclude the requested condition, OP and NDEP from definedness
823 checking. We're only interested in DEP1 and DEP2. */
824 call
->Iex
.CCall
.cee
->mcx_mask
= (1<<0) | (1<<1) | (1<<4);
825 return unop(Iop_32to1
, call
);
828 /* Build IR to calculate just the carry flag from stored
829 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I32. */
830 static IRExpr
* mk_x86g_calculate_eflags_c ( void )
833 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP
, Ity_I32
),
834 IRExpr_Get(OFFB_CC_DEP1
, Ity_I32
),
835 IRExpr_Get(OFFB_CC_DEP2
, Ity_I32
),
836 IRExpr_Get(OFFB_CC_NDEP
, Ity_I32
) );
841 "x86g_calculate_eflags_c", &x86g_calculate_eflags_c
,
844 /* Exclude OP and NDEP from definedness checking. We're only
845 interested in DEP1 and DEP2. */
846 call
->Iex
.CCall
.cee
->mcx_mask
= (1<<0) | (1<<3);
851 /* -------------- Building the flags-thunk. -------------- */
853 /* The machinery in this section builds the flag-thunk following a
854 flag-setting operation. Hence the various setFlags_* functions.
857 static Bool
isAddSub ( IROp op8
)
859 return toBool(op8
== Iop_Add8
|| op8
== Iop_Sub8
);
862 static Bool
isLogic ( IROp op8
)
864 return toBool(op8
== Iop_And8
|| op8
== Iop_Or8
|| op8
== Iop_Xor8
);
867 /* U-widen 8/16/32 bit int expr to 32. */
868 static IRExpr
* widenUto32 ( IRExpr
* e
)
870 switch (typeOfIRExpr(irsb
->tyenv
,e
)) {
871 case Ity_I32
: return e
;
872 case Ity_I16
: return unop(Iop_16Uto32
,e
);
873 case Ity_I8
: return unop(Iop_8Uto32
,e
);
874 default: vpanic("widenUto32");
878 /* S-widen 8/16/32 bit int expr to 32. */
879 static IRExpr
* widenSto32 ( IRExpr
* e
)
881 switch (typeOfIRExpr(irsb
->tyenv
,e
)) {
882 case Ity_I32
: return e
;
883 case Ity_I16
: return unop(Iop_16Sto32
,e
);
884 case Ity_I8
: return unop(Iop_8Sto32
,e
);
885 default: vpanic("widenSto32");
889 /* Narrow 8/16/32 bit int expr to 8/16/32. Clearly only some
890 of these combinations make sense. */
891 static IRExpr
* narrowTo ( IRType dst_ty
, IRExpr
* e
)
893 IRType src_ty
= typeOfIRExpr(irsb
->tyenv
,e
);
894 if (src_ty
== dst_ty
)
896 if (src_ty
== Ity_I32
&& dst_ty
== Ity_I16
)
897 return unop(Iop_32to16
, e
);
898 if (src_ty
== Ity_I32
&& dst_ty
== Ity_I8
)
899 return unop(Iop_32to8
, e
);
901 vex_printf("\nsrc, dst tys are: ");
906 vpanic("narrowTo(x86)");
910 /* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is
911 auto-sized up to the real op. */
914 void setFlags_DEP1_DEP2 ( IROp op8
, IRTemp dep1
, IRTemp dep2
, IRType ty
)
916 Int ccOp
= ty
==Ity_I8
? 0 : (ty
==Ity_I16
? 1 : 2);
918 vassert(ty
== Ity_I8
|| ty
== Ity_I16
|| ty
== Ity_I32
);
921 case Iop_Add8
: ccOp
+= X86G_CC_OP_ADDB
; break;
922 case Iop_Sub8
: ccOp
+= X86G_CC_OP_SUBB
; break;
923 default: ppIROp(op8
);
924 vpanic("setFlags_DEP1_DEP2(x86)");
926 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(ccOp
)) );
927 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto32(mkexpr(dep1
))) );
928 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto32(mkexpr(dep2
))) );
929 /* Set NDEP even though it isn't used. This makes redundant-PUT
930 elimination of previous stores to this field work better. */
931 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
935 /* Set the OP and DEP1 fields only, and write zero to DEP2. */
938 void setFlags_DEP1 ( IROp op8
, IRTemp dep1
, IRType ty
)
940 Int ccOp
= ty
==Ity_I8
? 0 : (ty
==Ity_I16
? 1 : 2);
942 vassert(ty
== Ity_I8
|| ty
== Ity_I16
|| ty
== Ity_I32
);
947 case Iop_Xor8
: ccOp
+= X86G_CC_OP_LOGICB
; break;
948 default: ppIROp(op8
);
949 vpanic("setFlags_DEP1(x86)");
951 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(ccOp
)) );
952 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto32(mkexpr(dep1
))) );
953 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0)) );
954 /* Set NDEP even though it isn't used. This makes redundant-PUT
955 elimination of previous stores to this field work better. */
956 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
960 /* For shift operations, we put in the result and the undershifted
961 result. Except if the shift amount is zero, the thunk is left
964 static void setFlags_DEP1_DEP2_shift ( IROp op32
,
970 Int ccOp
= ty
==Ity_I8
? 2 : (ty
==Ity_I16
? 1 : 0);
972 vassert(ty
== Ity_I8
|| ty
== Ity_I16
|| ty
== Ity_I32
);
975 /* Both kinds of right shifts are handled by the same thunk
979 case Iop_Sar32
: ccOp
= X86G_CC_OP_SHRL
- ccOp
; break;
980 case Iop_Shl32
: ccOp
= X86G_CC_OP_SHLL
- ccOp
; break;
981 default: ppIROp(op32
);
982 vpanic("setFlags_DEP1_DEP2_shift(x86)");
985 /* guard :: Ity_I8. We need to convert it to I1. */
986 IRTemp guardB
= newTemp(Ity_I1
);
987 assign( guardB
, binop(Iop_CmpNE8
, mkexpr(guard
), mkU8(0)) );
989 /* DEP1 contains the result, DEP2 contains the undershifted value. */
990 stmt( IRStmt_Put( OFFB_CC_OP
,
991 IRExpr_ITE( mkexpr(guardB
),
993 IRExpr_Get(OFFB_CC_OP
,Ity_I32
) ) ));
994 stmt( IRStmt_Put( OFFB_CC_DEP1
,
995 IRExpr_ITE( mkexpr(guardB
),
996 widenUto32(mkexpr(res
)),
997 IRExpr_Get(OFFB_CC_DEP1
,Ity_I32
) ) ));
998 stmt( IRStmt_Put( OFFB_CC_DEP2
,
999 IRExpr_ITE( mkexpr(guardB
),
1000 widenUto32(mkexpr(resUS
)),
1001 IRExpr_Get(OFFB_CC_DEP2
,Ity_I32
) ) ));
1002 /* Set NDEP even though it isn't used. This makes redundant-PUT
1003 elimination of previous stores to this field work better. */
1004 stmt( IRStmt_Put( OFFB_CC_NDEP
,
1005 IRExpr_ITE( mkexpr(guardB
),
1007 IRExpr_Get(OFFB_CC_NDEP
,Ity_I32
) ) ));
1011 /* For the inc/dec case, we store in DEP1 the result value and in NDEP
1012 the former value of the carry flag, which unfortunately we have to
1015 static void setFlags_INC_DEC ( Bool inc
, IRTemp res
, IRType ty
)
1017 Int ccOp
= inc
? X86G_CC_OP_INCB
: X86G_CC_OP_DECB
;
1019 ccOp
+= ty
==Ity_I8
? 0 : (ty
==Ity_I16
? 1 : 2);
1020 vassert(ty
== Ity_I8
|| ty
== Ity_I16
|| ty
== Ity_I32
);
1022 /* This has to come first, because calculating the C flag
1023 may require reading all four thunk fields. */
1024 stmt( IRStmt_Put( OFFB_CC_NDEP
, mk_x86g_calculate_eflags_c()) );
1025 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(ccOp
)) );
1026 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto32(mkexpr(res
))) );
1027 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0)) );
1031 /* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the
1035 void setFlags_MUL ( IRType ty
, IRTemp arg1
, IRTemp arg2
, UInt base_op
)
1039 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(base_op
+0) ) );
1042 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(base_op
+1) ) );
1045 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(base_op
+2) ) );
1048 vpanic("setFlags_MUL(x86)");
1050 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto32(mkexpr(arg1
)) ));
1051 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto32(mkexpr(arg2
)) ));
1052 /* Set NDEP even though it isn't used. This makes redundant-PUT
1053 elimination of previous stores to this field work better. */
1054 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
1058 /* -------------- Condition codes. -------------- */
1060 /* Condition codes, using the Intel encoding. */
1062 static const HChar
* name_X86Condcode ( X86Condcode cond
)
1065 case X86CondO
: return "o";
1066 case X86CondNO
: return "no";
1067 case X86CondB
: return "b";
1068 case X86CondNB
: return "nb";
1069 case X86CondZ
: return "z";
1070 case X86CondNZ
: return "nz";
1071 case X86CondBE
: return "be";
1072 case X86CondNBE
: return "nbe";
1073 case X86CondS
: return "s";
1074 case X86CondNS
: return "ns";
1075 case X86CondP
: return "p";
1076 case X86CondNP
: return "np";
1077 case X86CondL
: return "l";
1078 case X86CondNL
: return "nl";
1079 case X86CondLE
: return "le";
1080 case X86CondNLE
: return "nle";
1081 case X86CondAlways
: return "ALWAYS";
1082 default: vpanic("name_X86Condcode");
1087 X86Condcode
positiveIse_X86Condcode ( X86Condcode cond
,
1090 vassert(cond
>= X86CondO
&& cond
<= X86CondNLE
);
1095 *needInvert
= False
;
1101 /* -------------- Helpers for ADD/SUB with carry. -------------- */
1103 /* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags
1106 Optionally, generate a store for the 'tres' value. This can either
1107 be a normal store, or it can be a cas-with-possible-failure style
1110 if taddr is IRTemp_INVALID, then no store is generated.
1112 if taddr is not IRTemp_INVALID, then a store (using taddr as
1113 the address) is generated:
1115 if texpVal is IRTemp_INVALID then a normal store is
1116 generated, and restart_point must be zero (it is irrelevant).
1118 if texpVal is not IRTemp_INVALID then a cas-style store is
1119 generated. texpVal is the expected value, restart_point
1120 is the restart point if the store fails, and texpVal must
1121 have the same type as tres.
1123 static void helper_ADC ( Int sz
,
1124 IRTemp tres
, IRTemp ta1
, IRTemp ta2
,
1125 /* info about optional store: */
1126 IRTemp taddr
, IRTemp texpVal
, Addr32 restart_point
)
1129 IRType ty
= szToITy(sz
);
1130 IRTemp oldc
= newTemp(Ity_I32
);
1131 IRTemp oldcn
= newTemp(ty
);
1132 IROp plus
= mkSizedOp(ty
, Iop_Add8
);
1133 IROp
xor = mkSizedOp(ty
, Iop_Xor8
);
1135 vassert(typeOfIRTemp(irsb
->tyenv
, tres
) == ty
);
1136 vassert(sz
== 1 || sz
== 2 || sz
== 4);
1137 thunkOp
= sz
==4 ? X86G_CC_OP_ADCL
1138 : (sz
==2 ? X86G_CC_OP_ADCW
: X86G_CC_OP_ADCB
);
1140 /* oldc = old carry flag, 0 or 1 */
1141 assign( oldc
, binop(Iop_And32
,
1142 mk_x86g_calculate_eflags_c(),
1145 assign( oldcn
, narrowTo(ty
, mkexpr(oldc
)) );
1147 assign( tres
, binop(plus
,
1148 binop(plus
,mkexpr(ta1
),mkexpr(ta2
)),
1151 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
1152 start of this function. */
1153 if (taddr
!= IRTemp_INVALID
) {
1154 if (texpVal
== IRTemp_INVALID
) {
1155 vassert(restart_point
== 0);
1156 storeLE( mkexpr(taddr
), mkexpr(tres
) );
1158 vassert(typeOfIRTemp(irsb
->tyenv
, texpVal
) == ty
);
1159 /* .. and hence 'texpVal' has the same type as 'tres'. */
1160 casLE( mkexpr(taddr
),
1161 mkexpr(texpVal
), mkexpr(tres
), restart_point
);
1165 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(thunkOp
) ) );
1166 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto32(mkexpr(ta1
)) ));
1167 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto32(binop(xor, mkexpr(ta2
),
1168 mkexpr(oldcn
)) )) );
1169 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkexpr(oldc
) ) );
1173 /* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags
1174 appropriately. As with helper_ADC, possibly generate a store of
1175 the result -- see comments on helper_ADC for details.
1177 static void helper_SBB ( Int sz
,
1178 IRTemp tres
, IRTemp ta1
, IRTemp ta2
,
1179 /* info about optional store: */
1180 IRTemp taddr
, IRTemp texpVal
, Addr32 restart_point
)
1183 IRType ty
= szToITy(sz
);
1184 IRTemp oldc
= newTemp(Ity_I32
);
1185 IRTemp oldcn
= newTemp(ty
);
1186 IROp minus
= mkSizedOp(ty
, Iop_Sub8
);
1187 IROp
xor = mkSizedOp(ty
, Iop_Xor8
);
1189 vassert(typeOfIRTemp(irsb
->tyenv
, tres
) == ty
);
1190 vassert(sz
== 1 || sz
== 2 || sz
== 4);
1191 thunkOp
= sz
==4 ? X86G_CC_OP_SBBL
1192 : (sz
==2 ? X86G_CC_OP_SBBW
: X86G_CC_OP_SBBB
);
1194 /* oldc = old carry flag, 0 or 1 */
1195 assign( oldc
, binop(Iop_And32
,
1196 mk_x86g_calculate_eflags_c(),
1199 assign( oldcn
, narrowTo(ty
, mkexpr(oldc
)) );
1201 assign( tres
, binop(minus
,
1202 binop(minus
,mkexpr(ta1
),mkexpr(ta2
)),
1205 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
1206 start of this function. */
1207 if (taddr
!= IRTemp_INVALID
) {
1208 if (texpVal
== IRTemp_INVALID
) {
1209 vassert(restart_point
== 0);
1210 storeLE( mkexpr(taddr
), mkexpr(tres
) );
1212 vassert(typeOfIRTemp(irsb
->tyenv
, texpVal
) == ty
);
1213 /* .. and hence 'texpVal' has the same type as 'tres'. */
1214 casLE( mkexpr(taddr
),
1215 mkexpr(texpVal
), mkexpr(tres
), restart_point
);
1219 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(thunkOp
) ) );
1220 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto32(mkexpr(ta1
) )) );
1221 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto32(binop(xor, mkexpr(ta2
),
1222 mkexpr(oldcn
)) )) );
1223 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkexpr(oldc
) ) );
1227 /* -------------- Helpers for disassembly printing. -------------- */
1229 static const HChar
* nameGrp1 ( Int opc_aux
)
1231 static const HChar
* grp1_names
[8]
1232 = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" };
1233 if (opc_aux
< 0 || opc_aux
> 7) vpanic("nameGrp1(x86)");
1234 return grp1_names
[opc_aux
];
1237 static const HChar
* nameGrp2 ( Int opc_aux
)
1239 static const HChar
* grp2_names
[8]
1240 = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" };
1241 if (opc_aux
< 0 || opc_aux
> 7) vpanic("nameGrp2(x86)");
1242 return grp2_names
[opc_aux
];
1245 static const HChar
* nameGrp4 ( Int opc_aux
)
1247 static const HChar
* grp4_names
[8]
1248 = { "inc", "dec", "???", "???", "???", "???", "???", "???" };
1249 if (opc_aux
< 0 || opc_aux
> 1) vpanic("nameGrp4(x86)");
1250 return grp4_names
[opc_aux
];
1253 static const HChar
* nameGrp5 ( Int opc_aux
)
1255 static const HChar
* grp5_names
[8]
1256 = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" };
1257 if (opc_aux
< 0 || opc_aux
> 6) vpanic("nameGrp5(x86)");
1258 return grp5_names
[opc_aux
];
1261 static const HChar
* nameGrp8 ( Int opc_aux
)
1263 static const HChar
* grp8_names
[8]
1264 = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" };
1265 if (opc_aux
< 4 || opc_aux
> 7) vpanic("nameGrp8(x86)");
1266 return grp8_names
[opc_aux
];
1269 static const HChar
* nameIReg ( Int size
, Int reg
)
1271 static const HChar
* ireg32_names
[8]
1272 = { "%eax", "%ecx", "%edx", "%ebx",
1273 "%esp", "%ebp", "%esi", "%edi" };
1274 static const HChar
* ireg16_names
[8]
1275 = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di" };
1276 static const HChar
* ireg8_names
[8]
1277 = { "%al", "%cl", "%dl", "%bl",
1278 "%ah{sp}", "%ch{bp}", "%dh{si}", "%bh{di}" };
1279 if (reg
< 0 || reg
> 7) goto bad
;
1281 case 4: return ireg32_names
[reg
];
1282 case 2: return ireg16_names
[reg
];
1283 case 1: return ireg8_names
[reg
];
1286 vpanic("nameIReg(X86)");
1287 return NULL
; /*notreached*/
1290 static const HChar
* nameSReg ( UInt sreg
)
1293 case R_ES
: return "%es";
1294 case R_CS
: return "%cs";
1295 case R_SS
: return "%ss";
1296 case R_DS
: return "%ds";
1297 case R_FS
: return "%fs";
1298 case R_GS
: return "%gs";
1299 default: vpanic("nameSReg(x86)");
1303 static const HChar
* nameMMXReg ( Int mmxreg
)
1305 static const HChar
* mmx_names
[8]
1306 = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" };
1307 if (mmxreg
< 0 || mmxreg
> 7) vpanic("nameMMXReg(x86,guest)");
1308 return mmx_names
[mmxreg
];
1311 static const HChar
* nameXMMReg ( Int xmmreg
)
1313 static const HChar
* xmm_names
[8]
1314 = { "%xmm0", "%xmm1", "%xmm2", "%xmm3",
1315 "%xmm4", "%xmm5", "%xmm6", "%xmm7" };
1316 if (xmmreg
< 0 || xmmreg
> 7) vpanic("name_of_xmm_reg");
1317 return xmm_names
[xmmreg
];
1320 static const HChar
* nameMMXGran ( Int gran
)
1327 default: vpanic("nameMMXGran(x86,guest)");
1331 static HChar
nameISize ( Int size
)
1337 default: vpanic("nameISize(x86)");
1342 /*------------------------------------------------------------*/
1343 /*--- JMP helpers ---*/
1344 /*------------------------------------------------------------*/
1346 static void jmp_lit( /*MOD*/DisResult
* dres
,
1347 IRJumpKind kind
, Addr32 d32
)
1349 vassert(dres
->whatNext
== Dis_Continue
);
1350 vassert(dres
->len
== 0);
1351 vassert(dres
->continueAt
== 0);
1352 vassert(dres
->jk_StopHere
== Ijk_INVALID
);
1353 dres
->whatNext
= Dis_StopHere
;
1354 dres
->jk_StopHere
= kind
;
1355 stmt( IRStmt_Put( OFFB_EIP
, mkU32(d32
) ) );
1358 static void jmp_treg( /*MOD*/DisResult
* dres
,
1359 IRJumpKind kind
, IRTemp t
)
1361 vassert(dres
->whatNext
== Dis_Continue
);
1362 vassert(dres
->len
== 0);
1363 vassert(dres
->continueAt
== 0);
1364 vassert(dres
->jk_StopHere
== Ijk_INVALID
);
1365 dres
->whatNext
= Dis_StopHere
;
1366 dres
->jk_StopHere
= kind
;
1367 stmt( IRStmt_Put( OFFB_EIP
, mkexpr(t
) ) );
1371 void jcc_01( /*MOD*/DisResult
* dres
,
1372 X86Condcode cond
, Addr32 d32_false
, Addr32 d32_true
)
1375 X86Condcode condPos
;
1376 vassert(dres
->whatNext
== Dis_Continue
);
1377 vassert(dres
->len
== 0);
1378 vassert(dres
->continueAt
== 0);
1379 vassert(dres
->jk_StopHere
== Ijk_INVALID
);
1380 dres
->whatNext
= Dis_StopHere
;
1381 dres
->jk_StopHere
= Ijk_Boring
;
1382 condPos
= positiveIse_X86Condcode ( cond
, &invert
);
1384 stmt( IRStmt_Exit( mk_x86g_calculate_condition(condPos
),
1386 IRConst_U32(d32_false
),
1388 stmt( IRStmt_Put( OFFB_EIP
, mkU32(d32_true
) ) );
1390 stmt( IRStmt_Exit( mk_x86g_calculate_condition(condPos
),
1392 IRConst_U32(d32_true
),
1394 stmt( IRStmt_Put( OFFB_EIP
, mkU32(d32_false
) ) );
1399 /*------------------------------------------------------------*/
1400 /*--- Disassembling addressing modes ---*/
1401 /*------------------------------------------------------------*/
1404 const HChar
* sorbTxt ( UChar sorb
)
1407 case 0: return ""; /* no override */
1408 case 0x3E: return "%ds";
1409 case 0x26: return "%es:";
1410 case 0x64: return "%fs:";
1411 case 0x65: return "%gs:";
1412 case 0x36: return "%ss:";
1413 default: vpanic("sorbTxt(x86,guest)");
1418 /* 'virtual' is an IRExpr* holding a virtual address. Convert it to a
1419 linear address by adding any required segment override as indicated
1422 IRExpr
* handleSegOverride ( UChar sorb
, IRExpr
* virtual )
1426 IRTemp ldt_ptr
, gdt_ptr
, seg_selector
, r64
;
1429 /* the common case - no override */
1433 case 0x3E: sreg
= R_DS
; break;
1434 case 0x26: sreg
= R_ES
; break;
1435 case 0x64: sreg
= R_FS
; break;
1436 case 0x65: sreg
= R_GS
; break;
1437 case 0x36: sreg
= R_SS
; break;
1438 default: vpanic("handleSegOverride(x86,guest)");
1441 hWordTy
= sizeof(HWord
)==4 ? Ity_I32
: Ity_I64
;
1443 seg_selector
= newTemp(Ity_I32
);
1444 ldt_ptr
= newTemp(hWordTy
);
1445 gdt_ptr
= newTemp(hWordTy
);
1446 r64
= newTemp(Ity_I64
);
1448 assign( seg_selector
, unop(Iop_16Uto32
, getSReg(sreg
)) );
1449 assign( ldt_ptr
, IRExpr_Get( OFFB_LDT
, hWordTy
));
1450 assign( gdt_ptr
, IRExpr_Get( OFFB_GDT
, hWordTy
));
1453 Call this to do the translation and limit checks:
1454 ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
1455 UInt seg_selector, UInt virtual_addr )
1462 "x86g_use_seg_selector",
1463 &x86g_use_seg_selector
,
1464 mkIRExprVec_4( mkexpr(ldt_ptr
), mkexpr(gdt_ptr
),
1465 mkexpr(seg_selector
), virtual)
1469 /* If the high 32 of the result are non-zero, there was a
1470 failure in address translation. In which case, make a
1475 binop(Iop_CmpNE32
, unop(Iop_64HIto32
, mkexpr(r64
)), mkU32(0)),
1477 IRConst_U32( guest_EIP_curr_instr
),
1482 /* otherwise, here's the translated result. */
1483 return unop(Iop_64to32
, mkexpr(r64
));
1487 /* Generate IR to calculate an address indicated by a ModRM and
1488 following SIB bytes. The expression, and the number of bytes in
1489 the address mode, are returned. Note that this fn should not be
1490 called if the R/M part of the address denotes a register instead of
1491 memory. If print_codegen is true, text of the addressing mode is
1494 The computed address is stored in a new tempreg, and the
1495 identity of the tempreg is returned. */
1497 static IRTemp
disAMode_copy2tmp ( IRExpr
* addr32
)
1499 IRTemp tmp
= newTemp(Ity_I32
);
1500 assign( tmp
, addr32
);
1505 IRTemp
disAMode ( Int
* len
, UChar sorb
, Int delta
, HChar
* buf
)
1507 UChar mod_reg_rm
= getIByte(delta
);
1512 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
1513 jump table seems a bit excessive.
1515 mod_reg_rm
&= 0xC7; /* is now XX000YYY */
1516 mod_reg_rm
= toUChar(mod_reg_rm
| (mod_reg_rm
>> 3));
1517 /* is now XX0XXYYY */
1518 mod_reg_rm
&= 0x1F; /* is now 000XXYYY */
1519 switch (mod_reg_rm
) {
1521 /* (%eax) .. (%edi), not including (%esp) or (%ebp).
1524 case 0x00: case 0x01: case 0x02: case 0x03:
1525 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
1526 { UChar rm
= mod_reg_rm
;
1527 DIS(buf
, "%s(%s)", sorbTxt(sorb
), nameIReg(4,rm
));
1529 return disAMode_copy2tmp(
1530 handleSegOverride(sorb
, getIReg(4,rm
)));
1533 /* d8(%eax) ... d8(%edi), not including d8(%esp)
1534 --> GET %reg, t ; ADDL d8, t
1536 case 0x08: case 0x09: case 0x0A: case 0x0B:
1537 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
1538 { UChar rm
= toUChar(mod_reg_rm
& 7);
1539 UInt d
= getSDisp8(delta
);
1540 DIS(buf
, "%s%d(%s)", sorbTxt(sorb
), (Int
)d
, nameIReg(4,rm
));
1542 return disAMode_copy2tmp(
1543 handleSegOverride(sorb
,
1544 binop(Iop_Add32
,getIReg(4,rm
),mkU32(d
))));
1547 /* d32(%eax) ... d32(%edi), not including d32(%esp)
1548 --> GET %reg, t ; ADDL d8, t
1550 case 0x10: case 0x11: case 0x12: case 0x13:
1551 /* ! 14 */ case 0x15: case 0x16: case 0x17:
1552 { UChar rm
= toUChar(mod_reg_rm
& 7);
1553 UInt d
= getUDisp32(delta
);
1554 DIS(buf
, "%s0x%x(%s)", sorbTxt(sorb
), d
, nameIReg(4,rm
));
1556 return disAMode_copy2tmp(
1557 handleSegOverride(sorb
,
1558 binop(Iop_Add32
,getIReg(4,rm
),mkU32(d
))));
1561 /* a register, %eax .. %edi. This shouldn't happen. */
1562 case 0x18: case 0x19: case 0x1A: case 0x1B:
1563 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
1564 vpanic("disAMode(x86): not an addr!");
1566 /* a 32-bit literal address
1570 { UInt d
= getUDisp32(delta
);
1572 DIS(buf
, "%s(0x%x)", sorbTxt(sorb
), d
);
1573 return disAMode_copy2tmp(
1574 handleSegOverride(sorb
, mkU32(d
)));
1578 /* SIB, with no displacement. Special cases:
1579 -- %esp cannot act as an index value.
1580 If index_r indicates %esp, zero is used for the index.
1581 -- when mod is zero and base indicates EBP, base is instead
1583 It's all madness, I tell you. Extract %index, %base and
1584 scale from the SIB byte. The value denoted is then:
1585 | %index == %ESP && %base == %EBP
1586 = d32 following SIB byte
1587 | %index == %ESP && %base != %EBP
1589 | %index != %ESP && %base == %EBP
1590 = d32 following SIB byte + (%index << scale)
1591 | %index != %ESP && %base != %ESP
1592 = %base + (%index << scale)
1594 What happens to the souls of CPU architects who dream up such
1595 horrendous schemes, do you suppose?
1597 UChar sib
= getIByte(delta
);
1598 UChar scale
= toUChar((sib
>> 6) & 3);
1599 UChar index_r
= toUChar((sib
>> 3) & 7);
1600 UChar base_r
= toUChar(sib
& 7);
1603 if (index_r
!= R_ESP
&& base_r
!= R_EBP
) {
1604 DIS(buf
, "%s(%s,%s,%d)", sorbTxt(sorb
),
1605 nameIReg(4,base_r
), nameIReg(4,index_r
), 1<<scale
);
1609 handleSegOverride(sorb
,
1612 binop(Iop_Shl32
, getIReg(4,index_r
),
1616 if (index_r
!= R_ESP
&& base_r
== R_EBP
) {
1617 UInt d
= getUDisp32(delta
);
1618 DIS(buf
, "%s0x%x(,%s,%d)", sorbTxt(sorb
), d
,
1619 nameIReg(4,index_r
), 1<<scale
);
1623 handleSegOverride(sorb
,
1625 binop(Iop_Shl32
, getIReg(4,index_r
), mkU8(scale
)),
1629 if (index_r
== R_ESP
&& base_r
!= R_EBP
) {
1630 DIS(buf
, "%s(%s,,)", sorbTxt(sorb
), nameIReg(4,base_r
));
1632 return disAMode_copy2tmp(
1633 handleSegOverride(sorb
, getIReg(4,base_r
)));
1636 if (index_r
== R_ESP
&& base_r
== R_EBP
) {
1637 UInt d
= getUDisp32(delta
);
1638 DIS(buf
, "%s0x%x(,,)", sorbTxt(sorb
), d
);
1640 return disAMode_copy2tmp(
1641 handleSegOverride(sorb
, mkU32(d
)));
1647 /* SIB, with 8-bit displacement. Special cases:
1648 -- %esp cannot act as an index value.
1649 If index_r indicates %esp, zero is used for the index.
1654 = d8 + %base + (%index << scale)
1657 UChar sib
= getIByte(delta
);
1658 UChar scale
= toUChar((sib
>> 6) & 3);
1659 UChar index_r
= toUChar((sib
>> 3) & 7);
1660 UChar base_r
= toUChar(sib
& 7);
1661 UInt d
= getSDisp8(delta
+1);
1663 if (index_r
== R_ESP
) {
1664 DIS(buf
, "%s%d(%s,,)", sorbTxt(sorb
),
1665 (Int
)d
, nameIReg(4,base_r
));
1667 return disAMode_copy2tmp(
1668 handleSegOverride(sorb
,
1669 binop(Iop_Add32
, getIReg(4,base_r
), mkU32(d
)) ));
1671 DIS(buf
, "%s%d(%s,%s,%d)", sorbTxt(sorb
), (Int
)d
,
1672 nameIReg(4,base_r
), nameIReg(4,index_r
), 1<<scale
);
1676 handleSegOverride(sorb
,
1681 getIReg(4,index_r
), mkU8(scale
))),
1688 /* SIB, with 32-bit displacement. Special cases:
1689 -- %esp cannot act as an index value.
1690 If index_r indicates %esp, zero is used for the index.
1695 = d32 + %base + (%index << scale)
1698 UChar sib
= getIByte(delta
);
1699 UChar scale
= toUChar((sib
>> 6) & 3);
1700 UChar index_r
= toUChar((sib
>> 3) & 7);
1701 UChar base_r
= toUChar(sib
& 7);
1702 UInt d
= getUDisp32(delta
+1);
1704 if (index_r
== R_ESP
) {
1705 DIS(buf
, "%s%d(%s,,)", sorbTxt(sorb
),
1706 (Int
)d
, nameIReg(4,base_r
));
1708 return disAMode_copy2tmp(
1709 handleSegOverride(sorb
,
1710 binop(Iop_Add32
, getIReg(4,base_r
), mkU32(d
)) ));
1712 DIS(buf
, "%s%d(%s,%s,%d)", sorbTxt(sorb
), (Int
)d
,
1713 nameIReg(4,base_r
), nameIReg(4,index_r
), 1<<scale
);
1717 handleSegOverride(sorb
,
1722 getIReg(4,index_r
), mkU8(scale
))),
1730 vpanic("disAMode(x86)");
1731 return 0; /*notreached*/
1736 /* Figure out the number of (insn-stream) bytes constituting the amode
1737 beginning at delta. Is useful for getting hold of literals beyond
1738 the end of the amode before it has been disassembled. */
1740 static UInt
lengthAMode ( Int delta
)
1742 UChar mod_reg_rm
= getIByte(delta
); delta
++;
1744 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
1745 jump table seems a bit excessive.
1747 mod_reg_rm
&= 0xC7; /* is now XX000YYY */
1748 mod_reg_rm
= toUChar(mod_reg_rm
| (mod_reg_rm
>> 3));
1749 /* is now XX0XXYYY */
1750 mod_reg_rm
&= 0x1F; /* is now 000XXYYY */
1751 switch (mod_reg_rm
) {
1753 /* (%eax) .. (%edi), not including (%esp) or (%ebp). */
1754 case 0x00: case 0x01: case 0x02: case 0x03:
1755 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
1758 /* d8(%eax) ... d8(%edi), not including d8(%esp). */
1759 case 0x08: case 0x09: case 0x0A: case 0x0B:
1760 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
1763 /* d32(%eax) ... d32(%edi), not including d32(%esp). */
1764 case 0x10: case 0x11: case 0x12: case 0x13:
1765 /* ! 14 */ case 0x15: case 0x16: case 0x17:
1768 /* a register, %eax .. %edi. (Not an addr, but still handled.) */
1769 case 0x18: case 0x19: case 0x1A: case 0x1B:
1770 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
1773 /* a 32-bit literal address. */
1774 case 0x05: return 5;
1776 /* SIB, no displacement. */
1778 UChar sib
= getIByte(delta
);
1779 UChar base_r
= toUChar(sib
& 7);
1780 if (base_r
== R_EBP
) return 6; else return 2;
1782 /* SIB, with 8-bit displacement. */
1783 case 0x0C: return 3;
1785 /* SIB, with 32-bit displacement. */
1786 case 0x14: return 6;
1789 vpanic("lengthAMode");
1790 return 0; /*notreached*/
1794 /*------------------------------------------------------------*/
1795 /*--- Disassembling common idioms ---*/
1796 /*------------------------------------------------------------*/
1798 /* Handle binary integer instructions of the form
1801 Is passed the a ptr to the modRM byte, the actual operation, and the
1802 data size. Returns the address advanced completely over this
1805 E(src) is reg-or-mem
1808 If E is reg, --> GET %G, tmp
1812 If E is mem and OP is not reversible,
1813 --> (getAddr E) -> tmpa
1819 If E is mem and OP is reversible
1820 --> (getAddr E) -> tmpa
1826 UInt
dis_op2_E_G ( UChar sorb
,
1832 const HChar
* t_x86opc
)
1836 IRType ty
= szToITy(size
);
1837 IRTemp dst1
= newTemp(ty
);
1838 IRTemp src
= newTemp(ty
);
1839 IRTemp dst0
= newTemp(ty
);
1840 UChar rm
= getUChar(delta0
);
1841 IRTemp addr
= IRTemp_INVALID
;
1843 /* addSubCarry == True indicates the intended operation is
1844 add-with-carry or subtract-with-borrow. */
1846 vassert(op8
== Iop_Add8
|| op8
== Iop_Sub8
);
1850 if (epartIsReg(rm
)) {
1851 /* Specially handle XOR reg,reg, because that doesn't really
1852 depend on reg, and doing the obvious thing potentially
1853 generates a spurious value check failure due to the bogus
1854 dependency. Ditto SBB reg,reg. */
1855 if ((op8
== Iop_Xor8
|| (op8
== Iop_Sub8
&& addSubCarry
))
1856 && gregOfRM(rm
) == eregOfRM(rm
)) {
1857 putIReg(size
, gregOfRM(rm
), mkU(ty
,0));
1859 assign( dst0
, getIReg(size
,gregOfRM(rm
)) );
1860 assign( src
, getIReg(size
,eregOfRM(rm
)) );
1862 if (addSubCarry
&& op8
== Iop_Add8
) {
1863 helper_ADC( size
, dst1
, dst0
, src
,
1864 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
1865 putIReg(size
, gregOfRM(rm
), mkexpr(dst1
));
1867 if (addSubCarry
&& op8
== Iop_Sub8
) {
1868 helper_SBB( size
, dst1
, dst0
, src
,
1869 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
1870 putIReg(size
, gregOfRM(rm
), mkexpr(dst1
));
1872 assign( dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)) );
1874 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
1876 setFlags_DEP1(op8
, dst1
, ty
);
1878 putIReg(size
, gregOfRM(rm
), mkexpr(dst1
));
1881 DIP("%s%c %s,%s\n", t_x86opc
, nameISize(size
),
1882 nameIReg(size
,eregOfRM(rm
)),
1883 nameIReg(size
,gregOfRM(rm
)));
1886 /* E refers to memory */
1887 addr
= disAMode ( &len
, sorb
, delta0
, dis_buf
);
1888 assign( dst0
, getIReg(size
,gregOfRM(rm
)) );
1889 assign( src
, loadLE(szToITy(size
), mkexpr(addr
)) );
1891 if (addSubCarry
&& op8
== Iop_Add8
) {
1892 helper_ADC( size
, dst1
, dst0
, src
,
1893 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
1894 putIReg(size
, gregOfRM(rm
), mkexpr(dst1
));
1896 if (addSubCarry
&& op8
== Iop_Sub8
) {
1897 helper_SBB( size
, dst1
, dst0
, src
,
1898 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
1899 putIReg(size
, gregOfRM(rm
), mkexpr(dst1
));
1901 assign( dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)) );
1903 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
1905 setFlags_DEP1(op8
, dst1
, ty
);
1907 putIReg(size
, gregOfRM(rm
), mkexpr(dst1
));
1910 DIP("%s%c %s,%s\n", t_x86opc
, nameISize(size
),
1911 dis_buf
,nameIReg(size
,gregOfRM(rm
)));
1918 /* Handle binary integer instructions of the form
1921 Is passed the a ptr to the modRM byte, the actual operation, and the
1922 data size. Returns the address advanced completely over this
1926 E(dst) is reg-or-mem
1928 If E is reg, --> GET %E, tmp
1932 If E is mem, --> (getAddr E) -> tmpa
1938 UInt
dis_op2_G_E ( UChar sorb
,
1945 const HChar
* t_x86opc
)
1949 IRType ty
= szToITy(size
);
1950 IRTemp dst1
= newTemp(ty
);
1951 IRTemp src
= newTemp(ty
);
1952 IRTemp dst0
= newTemp(ty
);
1953 UChar rm
= getIByte(delta0
);
1954 IRTemp addr
= IRTemp_INVALID
;
1956 /* addSubCarry == True indicates the intended operation is
1957 add-with-carry or subtract-with-borrow. */
1959 vassert(op8
== Iop_Add8
|| op8
== Iop_Sub8
);
1963 if (epartIsReg(rm
)) {
1964 /* Specially handle XOR reg,reg, because that doesn't really
1965 depend on reg, and doing the obvious thing potentially
1966 generates a spurious value check failure due to the bogus
1967 dependency. Ditto SBB reg,reg.*/
1968 if ((op8
== Iop_Xor8
|| (op8
== Iop_Sub8
&& addSubCarry
))
1969 && gregOfRM(rm
) == eregOfRM(rm
)) {
1970 putIReg(size
, eregOfRM(rm
), mkU(ty
,0));
1972 assign(dst0
, getIReg(size
,eregOfRM(rm
)));
1973 assign(src
, getIReg(size
,gregOfRM(rm
)));
1975 if (addSubCarry
&& op8
== Iop_Add8
) {
1976 helper_ADC( size
, dst1
, dst0
, src
,
1977 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
1978 putIReg(size
, eregOfRM(rm
), mkexpr(dst1
));
1980 if (addSubCarry
&& op8
== Iop_Sub8
) {
1981 helper_SBB( size
, dst1
, dst0
, src
,
1982 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
1983 putIReg(size
, eregOfRM(rm
), mkexpr(dst1
));
1985 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)));
1987 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
1989 setFlags_DEP1(op8
, dst1
, ty
);
1991 putIReg(size
, eregOfRM(rm
), mkexpr(dst1
));
1994 DIP("%s%c %s,%s\n", t_x86opc
, nameISize(size
),
1995 nameIReg(size
,gregOfRM(rm
)),
1996 nameIReg(size
,eregOfRM(rm
)));
2000 /* E refers to memory */
2002 addr
= disAMode ( &len
, sorb
, delta0
, dis_buf
);
2003 assign(dst0
, loadLE(ty
,mkexpr(addr
)));
2004 assign(src
, getIReg(size
,gregOfRM(rm
)));
2006 if (addSubCarry
&& op8
== Iop_Add8
) {
2008 /* cas-style store */
2009 helper_ADC( size
, dst1
, dst0
, src
,
2010 /*store*/addr
, dst0
/*expVal*/, guest_EIP_curr_instr
);
2013 helper_ADC( size
, dst1
, dst0
, src
,
2014 /*store*/addr
, IRTemp_INVALID
, 0 );
2017 if (addSubCarry
&& op8
== Iop_Sub8
) {
2019 /* cas-style store */
2020 helper_SBB( size
, dst1
, dst0
, src
,
2021 /*store*/addr
, dst0
/*expVal*/, guest_EIP_curr_instr
);
2024 helper_SBB( size
, dst1
, dst0
, src
,
2025 /*store*/addr
, IRTemp_INVALID
, 0 );
2028 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)));
2031 if (0) vex_printf("locked case\n" );
2032 casLE( mkexpr(addr
),
2033 mkexpr(dst0
)/*expval*/,
2034 mkexpr(dst1
)/*newval*/, guest_EIP_curr_instr
);
2036 if (0) vex_printf("nonlocked case\n");
2037 storeLE(mkexpr(addr
), mkexpr(dst1
));
2041 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
2043 setFlags_DEP1(op8
, dst1
, ty
);
2046 DIP("%s%c %s,%s\n", t_x86opc
, nameISize(size
),
2047 nameIReg(size
,gregOfRM(rm
)), dis_buf
);
2053 /* Handle move instructions of the form
2056 Is passed the a ptr to the modRM byte, and the data size. Returns
2057 the address advanced completely over this instruction.
2059 E(src) is reg-or-mem
2062 If E is reg, --> GET %E, tmpv
2065 If E is mem --> (getAddr E) -> tmpa
2070 UInt
dis_mov_E_G ( UChar sorb
,
2075 UChar rm
= getIByte(delta0
);
2078 if (epartIsReg(rm
)) {
2079 putIReg(size
, gregOfRM(rm
), getIReg(size
, eregOfRM(rm
)));
2080 DIP("mov%c %s,%s\n", nameISize(size
),
2081 nameIReg(size
,eregOfRM(rm
)),
2082 nameIReg(size
,gregOfRM(rm
)));
2086 /* E refers to memory */
2088 IRTemp addr
= disAMode ( &len
, sorb
, delta0
, dis_buf
);
2089 putIReg(size
, gregOfRM(rm
), loadLE(szToITy(size
), mkexpr(addr
)));
2090 DIP("mov%c %s,%s\n", nameISize(size
),
2091 dis_buf
,nameIReg(size
,gregOfRM(rm
)));
2097 /* Handle move instructions of the form
2100 Is passed the a ptr to the modRM byte, and the data size. Returns
2101 the address advanced completely over this instruction.
2104 E(dst) is reg-or-mem
2106 If E is reg, --> GET %G, tmp
2109 If E is mem, --> (getAddr E) -> tmpa
2114 UInt
dis_mov_G_E ( UChar sorb
,
2119 UChar rm
= getIByte(delta0
);
2122 if (epartIsReg(rm
)) {
2123 putIReg(size
, eregOfRM(rm
), getIReg(size
, gregOfRM(rm
)));
2124 DIP("mov%c %s,%s\n", nameISize(size
),
2125 nameIReg(size
,gregOfRM(rm
)),
2126 nameIReg(size
,eregOfRM(rm
)));
2130 /* E refers to memory */
2132 IRTemp addr
= disAMode ( &len
, sorb
, delta0
, dis_buf
);
2133 storeLE( mkexpr(addr
), getIReg(size
, gregOfRM(rm
)) );
2134 DIP("mov%c %s,%s\n", nameISize(size
),
2135 nameIReg(size
,gregOfRM(rm
)), dis_buf
);
2141 /* op $immediate, AL/AX/EAX. */
2143 UInt
dis_op_imm_A ( Int size
,
2148 const HChar
* t_x86opc
)
2150 IRType ty
= szToITy(size
);
2151 IRTemp dst0
= newTemp(ty
);
2152 IRTemp src
= newTemp(ty
);
2153 IRTemp dst1
= newTemp(ty
);
2154 UInt lit
= getUDisp(size
,delta
);
2155 assign(dst0
, getIReg(size
,R_EAX
));
2156 assign(src
, mkU(ty
,lit
));
2158 if (isAddSub(op8
) && !carrying
) {
2159 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)) );
2160 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
2165 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)) );
2166 setFlags_DEP1(op8
, dst1
, ty
);
2169 if (op8
== Iop_Add8
&& carrying
) {
2170 helper_ADC( size
, dst1
, dst0
, src
,
2171 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
2174 if (op8
== Iop_Sub8
&& carrying
) {
2175 helper_SBB( size
, dst1
, dst0
, src
,
2176 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
2179 vpanic("dis_op_imm_A(x86,guest)");
2182 putIReg(size
, R_EAX
, mkexpr(dst1
));
2184 DIP("%s%c $0x%x, %s\n", t_x86opc
, nameISize(size
),
2185 lit
, nameIReg(size
,R_EAX
));
2190 /* Sign- and Zero-extending moves. */
2192 UInt
dis_movx_E_G ( UChar sorb
,
2193 Int delta
, Int szs
, Int szd
, Bool sign_extend
)
2195 UChar rm
= getIByte(delta
);
2196 if (epartIsReg(rm
)) {
2198 // mutant case. See #250799
2199 putIReg(szd
, gregOfRM(rm
),
2200 getIReg(szs
,eregOfRM(rm
)));
2203 putIReg(szd
, gregOfRM(rm
),
2204 unop(mkWidenOp(szs
,szd
,sign_extend
),
2205 getIReg(szs
,eregOfRM(rm
))));
2207 DIP("mov%c%c%c %s,%s\n", sign_extend
? 's' : 'z',
2208 nameISize(szs
), nameISize(szd
),
2209 nameIReg(szs
,eregOfRM(rm
)),
2210 nameIReg(szd
,gregOfRM(rm
)));
2214 /* E refers to memory */
2218 IRTemp addr
= disAMode ( &len
, sorb
, delta
, dis_buf
);
2220 // mutant case. See #250799
2221 putIReg(szd
, gregOfRM(rm
),
2222 loadLE(szToITy(szs
),mkexpr(addr
)));
2225 putIReg(szd
, gregOfRM(rm
),
2226 unop(mkWidenOp(szs
,szd
,sign_extend
),
2227 loadLE(szToITy(szs
),mkexpr(addr
))));
2229 DIP("mov%c%c%c %s,%s\n", sign_extend
? 's' : 'z',
2230 nameISize(szs
), nameISize(szd
),
2231 dis_buf
, nameIReg(szd
,gregOfRM(rm
)));
2237 /* Generate code to divide ArchRegs EDX:EAX / DX:AX / AX by the 32 /
2238 16 / 8 bit quantity in the given IRTemp. */
2240 void codegen_div ( Int sz
, IRTemp t
, Bool signed_divide
)
2242 IROp op
= signed_divide
? Iop_DivModS64to32
: Iop_DivModU64to32
;
2243 IRTemp src64
= newTemp(Ity_I64
);
2244 IRTemp dst64
= newTemp(Ity_I64
);
2247 assign( src64
, binop(Iop_32HLto64
,
2248 getIReg(4,R_EDX
), getIReg(4,R_EAX
)) );
2249 assign( dst64
, binop(op
, mkexpr(src64
), mkexpr(t
)) );
2250 putIReg( 4, R_EAX
, unop(Iop_64to32
,mkexpr(dst64
)) );
2251 putIReg( 4, R_EDX
, unop(Iop_64HIto32
,mkexpr(dst64
)) );
2254 IROp widen3264
= signed_divide
? Iop_32Sto64
: Iop_32Uto64
;
2255 IROp widen1632
= signed_divide
? Iop_16Sto32
: Iop_16Uto32
;
2256 assign( src64
, unop(widen3264
,
2258 getIReg(2,R_EDX
), getIReg(2,R_EAX
))) );
2259 assign( dst64
, binop(op
, mkexpr(src64
), unop(widen1632
,mkexpr(t
))) );
2260 putIReg( 2, R_EAX
, unop(Iop_32to16
,unop(Iop_64to32
,mkexpr(dst64
))) );
2261 putIReg( 2, R_EDX
, unop(Iop_32to16
,unop(Iop_64HIto32
,mkexpr(dst64
))) );
2265 IROp widen3264
= signed_divide
? Iop_32Sto64
: Iop_32Uto64
;
2266 IROp widen1632
= signed_divide
? Iop_16Sto32
: Iop_16Uto32
;
2267 IROp widen816
= signed_divide
? Iop_8Sto16
: Iop_8Uto16
;
2268 assign( src64
, unop(widen3264
, unop(widen1632
, getIReg(2,R_EAX
))) );
2270 binop(op
, mkexpr(src64
),
2271 unop(widen1632
, unop(widen816
, mkexpr(t
)))) );
2272 putIReg( 1, R_AL
, unop(Iop_16to8
, unop(Iop_32to16
,
2273 unop(Iop_64to32
,mkexpr(dst64
)))) );
2274 putIReg( 1, R_AH
, unop(Iop_16to8
, unop(Iop_32to16
,
2275 unop(Iop_64HIto32
,mkexpr(dst64
)))) );
2278 default: vpanic("codegen_div(x86)");
2284 UInt
dis_Grp1 ( UChar sorb
, Bool locked
,
2285 Int delta
, UChar modrm
,
2286 Int am_sz
, Int d_sz
, Int sz
, UInt d32
)
2290 IRType ty
= szToITy(sz
);
2291 IRTemp dst1
= newTemp(ty
);
2292 IRTemp src
= newTemp(ty
);
2293 IRTemp dst0
= newTemp(ty
);
2294 IRTemp addr
= IRTemp_INVALID
;
2295 IROp op8
= Iop_INVALID
;
2296 UInt mask
= sz
==1 ? 0xFF : (sz
==2 ? 0xFFFF : 0xFFFFFFFF);
2298 switch (gregOfRM(modrm
)) {
2299 case 0: op8
= Iop_Add8
; break; case 1: op8
= Iop_Or8
; break;
2300 case 2: break; // ADC
2301 case 3: break; // SBB
2302 case 4: op8
= Iop_And8
; break; case 5: op8
= Iop_Sub8
; break;
2303 case 6: op8
= Iop_Xor8
; break; case 7: op8
= Iop_Sub8
; break;
2305 default: vpanic("dis_Grp1: unhandled case");
2308 if (epartIsReg(modrm
)) {
2309 vassert(am_sz
== 1);
2311 assign(dst0
, getIReg(sz
,eregOfRM(modrm
)));
2312 assign(src
, mkU(ty
,d32
& mask
));
2314 if (gregOfRM(modrm
) == 2 /* ADC */) {
2315 helper_ADC( sz
, dst1
, dst0
, src
,
2316 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
2318 if (gregOfRM(modrm
) == 3 /* SBB */) {
2319 helper_SBB( sz
, dst1
, dst0
, src
,
2320 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
2322 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)));
2324 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
2326 setFlags_DEP1(op8
, dst1
, ty
);
2329 if (gregOfRM(modrm
) < 7)
2330 putIReg(sz
, eregOfRM(modrm
), mkexpr(dst1
));
2332 delta
+= (am_sz
+ d_sz
);
2333 DIP("%s%c $0x%x, %s\n", nameGrp1(gregOfRM(modrm
)), nameISize(sz
), d32
,
2334 nameIReg(sz
,eregOfRM(modrm
)));
2336 addr
= disAMode ( &len
, sorb
, delta
, dis_buf
);
2338 assign(dst0
, loadLE(ty
,mkexpr(addr
)));
2339 assign(src
, mkU(ty
,d32
& mask
));
2341 if (gregOfRM(modrm
) == 2 /* ADC */) {
2343 /* cas-style store */
2344 helper_ADC( sz
, dst1
, dst0
, src
,
2345 /*store*/addr
, dst0
/*expVal*/, guest_EIP_curr_instr
);
2348 helper_ADC( sz
, dst1
, dst0
, src
,
2349 /*store*/addr
, IRTemp_INVALID
, 0 );
2352 if (gregOfRM(modrm
) == 3 /* SBB */) {
2354 /* cas-style store */
2355 helper_SBB( sz
, dst1
, dst0
, src
,
2356 /*store*/addr
, dst0
/*expVal*/, guest_EIP_curr_instr
);
2359 helper_SBB( sz
, dst1
, dst0
, src
,
2360 /*store*/addr
, IRTemp_INVALID
, 0 );
2363 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)));
2364 if (gregOfRM(modrm
) < 7) {
2366 casLE( mkexpr(addr
), mkexpr(dst0
)/*expVal*/,
2367 mkexpr(dst1
)/*newVal*/,
2368 guest_EIP_curr_instr
);
2370 storeLE(mkexpr(addr
), mkexpr(dst1
));
2374 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
2376 setFlags_DEP1(op8
, dst1
, ty
);
2379 delta
+= (len
+d_sz
);
2380 DIP("%s%c $0x%x, %s\n", nameGrp1(gregOfRM(modrm
)), nameISize(sz
),
2387 /* Group 2 extended opcodes. shift_expr must be an 8-bit typed
2391 UInt
dis_Grp2 ( UChar sorb
,
2392 Int delta
, UChar modrm
,
2393 Int am_sz
, Int d_sz
, Int sz
, IRExpr
* shift_expr
,
2394 const HChar
* shift_expr_txt
, Bool
* decode_OK
)
2396 /* delta on entry points at the modrm byte. */
2399 Bool isShift
, isRotate
, isRotateC
;
2400 IRType ty
= szToITy(sz
);
2401 IRTemp dst0
= newTemp(ty
);
2402 IRTemp dst1
= newTemp(ty
);
2403 IRTemp addr
= IRTemp_INVALID
;
2407 vassert(sz
== 1 || sz
== 2 || sz
== 4);
2409 /* Put value to shift/rotate in dst0. */
2410 if (epartIsReg(modrm
)) {
2411 assign(dst0
, getIReg(sz
, eregOfRM(modrm
)));
2412 delta
+= (am_sz
+ d_sz
);
2414 addr
= disAMode ( &len
, sorb
, delta
, dis_buf
);
2415 assign(dst0
, loadLE(ty
,mkexpr(addr
)));
2416 delta
+= len
+ d_sz
;
2420 switch (gregOfRM(modrm
)) { case 4: case 5: case 6: case 7: isShift
= True
; }
2423 switch (gregOfRM(modrm
)) { case 0: case 1: isRotate
= True
; }
2426 switch (gregOfRM(modrm
)) { case 2: case 3: isRotateC
= True
; }
2428 if (!isShift
&& !isRotate
&& !isRotateC
) {
2430 vpanic("dis_Grp2(Reg): unhandled case(x86)");
2434 /* call a helper; these insns are so ridiculous they do not
2436 Bool left
= toBool(gregOfRM(modrm
) == 2);
2437 IRTemp r64
= newTemp(Ity_I64
);
2439 = mkIRExprVec_4( widenUto32(mkexpr(dst0
)), /* thing to rotate */
2440 widenUto32(shift_expr
), /* rotate amount */
2441 widenUto32(mk_x86g_calculate_eflags_all()),
2443 assign( r64
, mkIRExprCCall(
2446 left
? "x86g_calculate_RCL" : "x86g_calculate_RCR",
2447 left
? &x86g_calculate_RCL
: &x86g_calculate_RCR
,
2451 /* new eflags in hi half r64; new value in lo half r64 */
2452 assign( dst1
, narrowTo(ty
, unop(Iop_64to32
, mkexpr(r64
))) );
2453 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
2454 stmt( IRStmt_Put( OFFB_CC_DEP1
, unop(Iop_64HIto32
, mkexpr(r64
)) ));
2455 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
2456 /* Set NDEP even though it isn't used. This makes redundant-PUT
2457 elimination of previous stores to this field work better. */
2458 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
2463 IRTemp pre32
= newTemp(Ity_I32
);
2464 IRTemp res32
= newTemp(Ity_I32
);
2465 IRTemp res32ss
= newTemp(Ity_I32
);
2466 IRTemp shift_amt
= newTemp(Ity_I8
);
2469 switch (gregOfRM(modrm
)) {
2470 case 4: op32
= Iop_Shl32
; break;
2471 case 5: op32
= Iop_Shr32
; break;
2472 case 6: op32
= Iop_Shl32
; break;
2473 case 7: op32
= Iop_Sar32
; break;
2475 default: vpanic("dis_Grp2:shift"); break;
2478 /* Widen the value to be shifted to 32 bits, do the shift, and
2479 narrow back down. This seems surprisingly long-winded, but
2480 unfortunately the Intel semantics requires that 8/16-bit
2481 shifts give defined results for shift values all the way up
2482 to 31, and this seems the simplest way to do it. It has the
2483 advantage that the only IR level shifts generated are of 32
2484 bit values, and the shift amount is guaranteed to be in the
2485 range 0 .. 31, thereby observing the IR semantics requiring
2486 all shift values to be in the range 0 .. 2^word_size-1. */
2488 /* shift_amt = shift_expr & 31, regardless of operation size */
2489 assign( shift_amt
, binop(Iop_And8
, shift_expr
, mkU8(31)) );
2491 /* suitably widen the value to be shifted to 32 bits. */
2492 assign( pre32
, op32
==Iop_Sar32
? widenSto32(mkexpr(dst0
))
2493 : widenUto32(mkexpr(dst0
)) );
2495 /* res32 = pre32 `shift` shift_amt */
2496 assign( res32
, binop(op32
, mkexpr(pre32
), mkexpr(shift_amt
)) );
2498 /* res32ss = pre32 `shift` ((shift_amt - 1) & 31) */
2504 mkexpr(shift_amt
), mkU8(1)),
2507 /* Build the flags thunk. */
2508 setFlags_DEP1_DEP2_shift(op32
, res32
, res32ss
, ty
, shift_amt
);
2510 /* Narrow the result back down. */
2511 assign( dst1
, narrowTo(ty
, mkexpr(res32
)) );
2513 } /* if (isShift) */
2517 Int ccOp
= ty
==Ity_I8
? 0 : (ty
==Ity_I16
? 1 : 2);
2518 Bool left
= toBool(gregOfRM(modrm
) == 0);
2519 IRTemp rot_amt
= newTemp(Ity_I8
);
2520 IRTemp rot_amt32
= newTemp(Ity_I8
);
2521 IRTemp oldFlags
= newTemp(Ity_I32
);
2523 /* rot_amt = shift_expr & mask */
2524 /* By masking the rotate amount thusly, the IR-level Shl/Shr
2525 expressions never shift beyond the word size and thus remain
2527 assign(rot_amt32
, binop(Iop_And8
, shift_expr
, mkU8(31)));
2530 assign(rot_amt
, mkexpr(rot_amt32
));
2532 assign(rot_amt
, binop(Iop_And8
, mkexpr(rot_amt32
), mkU8(8*sz
-1)));
2536 /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */
2538 binop( mkSizedOp(ty
,Iop_Or8
),
2539 binop( mkSizedOp(ty
,Iop_Shl8
),
2543 binop( mkSizedOp(ty
,Iop_Shr8
),
2545 binop(Iop_Sub8
,mkU8(8*sz
), mkexpr(rot_amt
))
2549 ccOp
+= X86G_CC_OP_ROLB
;
2551 } else { /* right */
2553 /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */
2555 binop( mkSizedOp(ty
,Iop_Or8
),
2556 binop( mkSizedOp(ty
,Iop_Shr8
),
2560 binop( mkSizedOp(ty
,Iop_Shl8
),
2562 binop(Iop_Sub8
,mkU8(8*sz
), mkexpr(rot_amt
))
2566 ccOp
+= X86G_CC_OP_RORB
;
2570 /* dst1 now holds the rotated value. Build flag thunk. We
2571 need the resulting value for this, and the previous flags.
2572 Except don't set it if the rotate count is zero. */
2574 assign(oldFlags
, mk_x86g_calculate_eflags_all());
2576 /* rot_amt32 :: Ity_I8. We need to convert it to I1. */
2577 IRTemp rot_amt32b
= newTemp(Ity_I1
);
2578 assign(rot_amt32b
, binop(Iop_CmpNE8
, mkexpr(rot_amt32
), mkU8(0)) );
2580 /* CC_DEP1 is the rotated value. CC_NDEP is flags before. */
2581 stmt( IRStmt_Put( OFFB_CC_OP
,
2582 IRExpr_ITE( mkexpr(rot_amt32b
),
2584 IRExpr_Get(OFFB_CC_OP
,Ity_I32
) ) ));
2585 stmt( IRStmt_Put( OFFB_CC_DEP1
,
2586 IRExpr_ITE( mkexpr(rot_amt32b
),
2587 widenUto32(mkexpr(dst1
)),
2588 IRExpr_Get(OFFB_CC_DEP1
,Ity_I32
) ) ));
2589 stmt( IRStmt_Put( OFFB_CC_DEP2
,
2590 IRExpr_ITE( mkexpr(rot_amt32b
),
2592 IRExpr_Get(OFFB_CC_DEP2
,Ity_I32
) ) ));
2593 stmt( IRStmt_Put( OFFB_CC_NDEP
,
2594 IRExpr_ITE( mkexpr(rot_amt32b
),
2596 IRExpr_Get(OFFB_CC_NDEP
,Ity_I32
) ) ));
2597 } /* if (isRotate) */
2599 /* Save result, and finish up. */
2600 if (epartIsReg(modrm
)) {
2601 putIReg(sz
, eregOfRM(modrm
), mkexpr(dst1
));
2602 if (vex_traceflags
& VEX_TRACE_FE
) {
2604 nameGrp2(gregOfRM(modrm
)), nameISize(sz
) );
2606 vex_printf("%s", shift_expr_txt
);
2608 ppIRExpr(shift_expr
);
2609 vex_printf(", %s\n", nameIReg(sz
,eregOfRM(modrm
)));
2612 storeLE(mkexpr(addr
), mkexpr(dst1
));
2613 if (vex_traceflags
& VEX_TRACE_FE
) {
2615 nameGrp2(gregOfRM(modrm
)), nameISize(sz
) );
2617 vex_printf("%s", shift_expr_txt
);
2619 ppIRExpr(shift_expr
);
2620 vex_printf(", %s\n", dis_buf
);
2627 /* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */
2629 UInt
dis_Grp8_Imm ( UChar sorb
,
2631 Int delta
, UChar modrm
,
2632 Int am_sz
, Int sz
, UInt src_val
,
2635 /* src_val denotes a d8.
2636 And delta on entry points at the modrm byte. */
2638 IRType ty
= szToITy(sz
);
2639 IRTemp t2
= newTemp(Ity_I32
);
2640 IRTemp t2m
= newTemp(Ity_I32
);
2641 IRTemp t_addr
= IRTemp_INVALID
;
2645 /* we're optimists :-) */
2648 /* Limit src_val -- the bit offset -- to something within a word.
2649 The Intel docs say that literal offsets larger than a word are
2650 masked in this way. */
2652 case 2: src_val
&= 15; break;
2653 case 4: src_val
&= 31; break;
2654 default: *decode_OK
= False
; return delta
;
2657 /* Invent a mask suitable for the operation. */
2658 switch (gregOfRM(modrm
)) {
2659 case 4: /* BT */ mask
= 0; break;
2660 case 5: /* BTS */ mask
= 1 << src_val
; break;
2661 case 6: /* BTR */ mask
= ~(1 << src_val
); break;
2662 case 7: /* BTC */ mask
= 1 << src_val
; break;
2663 /* If this needs to be extended, probably simplest to make a
2664 new function to handle the other cases (0 .. 3). The
2665 Intel docs do however not indicate any use for 0 .. 3, so
2666 we don't expect this to happen. */
2667 default: *decode_OK
= False
; return delta
;
2670 /* Fetch the value to be tested and modified into t2, which is
2671 32-bits wide regardless of sz. */
2672 if (epartIsReg(modrm
)) {
2673 vassert(am_sz
== 1);
2674 assign( t2
, widenUto32(getIReg(sz
, eregOfRM(modrm
))) );
2675 delta
+= (am_sz
+ 1);
2676 DIP("%s%c $0x%x, %s\n", nameGrp8(gregOfRM(modrm
)), nameISize(sz
),
2677 src_val
, nameIReg(sz
,eregOfRM(modrm
)));
2680 t_addr
= disAMode ( &len
, sorb
, delta
, dis_buf
);
2682 assign( t2
, widenUto32(loadLE(ty
, mkexpr(t_addr
))) );
2683 DIP("%s%c $0x%x, %s\n", nameGrp8(gregOfRM(modrm
)), nameISize(sz
),
2687 /* Compute the new value into t2m, if non-BT. */
2688 switch (gregOfRM(modrm
)) {
2692 assign( t2m
, binop(Iop_Or32
, mkU32(mask
), mkexpr(t2
)) );
2695 assign( t2m
, binop(Iop_And32
, mkU32(mask
), mkexpr(t2
)) );
2698 assign( t2m
, binop(Iop_Xor32
, mkU32(mask
), mkexpr(t2
)) );
2701 /*NOTREACHED*/ /*the previous switch guards this*/
2705 /* Write the result back, if non-BT. If the CAS fails then we
2706 side-exit from the trace at this point, and so the flag state is
2707 not affected. This is of course as required. */
2708 if (gregOfRM(modrm
) != 4 /* BT */) {
2709 if (epartIsReg(modrm
)) {
2710 putIReg(sz
, eregOfRM(modrm
), narrowTo(ty
, mkexpr(t2m
)));
2713 casLE( mkexpr(t_addr
),
2714 narrowTo(ty
, mkexpr(t2
))/*expd*/,
2715 narrowTo(ty
, mkexpr(t2m
))/*new*/,
2716 guest_EIP_curr_instr
);
2718 storeLE(mkexpr(t_addr
), narrowTo(ty
, mkexpr(t2m
)));
2723 /* Copy relevant bit from t2 into the carry flag. */
2724 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
2725 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
2726 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
2730 binop(Iop_Shr32
, mkexpr(t2
), mkU8(src_val
)),
2733 /* Set NDEP even though it isn't used. This makes redundant-PUT
2734 elimination of previous stores to this field work better. */
2735 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
2741 /* Signed/unsigned widening multiply. Generate IR to multiply the
2742 value in EAX/AX/AL by the given IRTemp, and park the result in
2745 static void codegen_mulL_A_D ( Int sz
, Bool syned
,
2746 IRTemp tmp
, const HChar
* tmp_txt
)
2748 IRType ty
= szToITy(sz
);
2749 IRTemp t1
= newTemp(ty
);
2751 assign( t1
, getIReg(sz
, R_EAX
) );
2755 IRTemp res64
= newTemp(Ity_I64
);
2756 IRTemp resHi
= newTemp(Ity_I32
);
2757 IRTemp resLo
= newTemp(Ity_I32
);
2758 IROp mulOp
= syned
? Iop_MullS32
: Iop_MullU32
;
2759 UInt tBaseOp
= syned
? X86G_CC_OP_SMULB
: X86G_CC_OP_UMULB
;
2760 setFlags_MUL ( Ity_I32
, t1
, tmp
, tBaseOp
);
2761 assign( res64
, binop(mulOp
, mkexpr(t1
), mkexpr(tmp
)) );
2762 assign( resHi
, unop(Iop_64HIto32
,mkexpr(res64
)));
2763 assign( resLo
, unop(Iop_64to32
,mkexpr(res64
)));
2764 putIReg(4, R_EDX
, mkexpr(resHi
));
2765 putIReg(4, R_EAX
, mkexpr(resLo
));
2769 IRTemp res32
= newTemp(Ity_I32
);
2770 IRTemp resHi
= newTemp(Ity_I16
);
2771 IRTemp resLo
= newTemp(Ity_I16
);
2772 IROp mulOp
= syned
? Iop_MullS16
: Iop_MullU16
;
2773 UInt tBaseOp
= syned
? X86G_CC_OP_SMULB
: X86G_CC_OP_UMULB
;
2774 setFlags_MUL ( Ity_I16
, t1
, tmp
, tBaseOp
);
2775 assign( res32
, binop(mulOp
, mkexpr(t1
), mkexpr(tmp
)) );
2776 assign( resHi
, unop(Iop_32HIto16
,mkexpr(res32
)));
2777 assign( resLo
, unop(Iop_32to16
,mkexpr(res32
)));
2778 putIReg(2, R_EDX
, mkexpr(resHi
));
2779 putIReg(2, R_EAX
, mkexpr(resLo
));
2783 IRTemp res16
= newTemp(Ity_I16
);
2784 IRTemp resHi
= newTemp(Ity_I8
);
2785 IRTemp resLo
= newTemp(Ity_I8
);
2786 IROp mulOp
= syned
? Iop_MullS8
: Iop_MullU8
;
2787 UInt tBaseOp
= syned
? X86G_CC_OP_SMULB
: X86G_CC_OP_UMULB
;
2788 setFlags_MUL ( Ity_I8
, t1
, tmp
, tBaseOp
);
2789 assign( res16
, binop(mulOp
, mkexpr(t1
), mkexpr(tmp
)) );
2790 assign( resHi
, unop(Iop_16HIto8
,mkexpr(res16
)));
2791 assign( resLo
, unop(Iop_16to8
,mkexpr(res16
)));
2792 putIReg(2, R_EAX
, mkexpr(res16
));
2796 vpanic("codegen_mulL_A_D(x86)");
2798 DIP("%s%c %s\n", syned
? "imul" : "mul", nameISize(sz
), tmp_txt
);
2802 /* Group 3 extended opcodes. */
2804 UInt
dis_Grp3 ( UChar sorb
, Bool locked
, Int sz
, Int delta
, Bool
* decode_OK
)
2811 IRType ty
= szToITy(sz
);
2812 IRTemp t1
= newTemp(ty
);
2813 IRTemp dst1
, src
, dst0
;
2815 *decode_OK
= True
; /* may change this later */
2817 modrm
= getIByte(delta
);
2819 if (locked
&& (gregOfRM(modrm
) != 2 && gregOfRM(modrm
) != 3)) {
2820 /* LOCK prefix only allowed with not and neg subopcodes */
2825 if (epartIsReg(modrm
)) {
2826 switch (gregOfRM(modrm
)) {
2827 case 0: { /* TEST */
2828 delta
++; d32
= getUDisp(sz
, delta
); delta
+= sz
;
2830 assign(dst1
, binop(mkSizedOp(ty
,Iop_And8
),
2831 getIReg(sz
,eregOfRM(modrm
)),
2833 setFlags_DEP1( Iop_And8
, dst1
, ty
);
2834 DIP("test%c $0x%x, %s\n", nameISize(sz
), d32
,
2835 nameIReg(sz
, eregOfRM(modrm
)));
2838 case 1: /* UNDEFINED */
2839 /* The Intel docs imply this insn is undefined and binutils
2840 agrees. Unfortunately Core 2 will run it (with who
2841 knows what result?) sandpile.org reckons it's an alias
2842 for case 0. We play safe. */
2847 putIReg(sz
, eregOfRM(modrm
),
2848 unop(mkSizedOp(ty
,Iop_Not8
),
2849 getIReg(sz
, eregOfRM(modrm
))));
2850 DIP("not%c %s\n", nameISize(sz
), nameIReg(sz
, eregOfRM(modrm
)));
2857 assign(dst0
, mkU(ty
,0));
2858 assign(src
, getIReg(sz
,eregOfRM(modrm
)));
2859 assign(dst1
, binop(mkSizedOp(ty
,Iop_Sub8
), mkexpr(dst0
), mkexpr(src
)));
2860 setFlags_DEP1_DEP2(Iop_Sub8
, dst0
, src
, ty
);
2861 putIReg(sz
, eregOfRM(modrm
), mkexpr(dst1
));
2862 DIP("neg%c %s\n", nameISize(sz
), nameIReg(sz
, eregOfRM(modrm
)));
2864 case 4: /* MUL (unsigned widening) */
2867 assign(src
, getIReg(sz
,eregOfRM(modrm
)));
2868 codegen_mulL_A_D ( sz
, False
, src
, nameIReg(sz
,eregOfRM(modrm
)) );
2870 case 5: /* IMUL (signed widening) */
2873 assign(src
, getIReg(sz
,eregOfRM(modrm
)));
2874 codegen_mulL_A_D ( sz
, True
, src
, nameIReg(sz
,eregOfRM(modrm
)) );
2878 assign( t1
, getIReg(sz
, eregOfRM(modrm
)) );
2879 codegen_div ( sz
, t1
, False
);
2880 DIP("div%c %s\n", nameISize(sz
), nameIReg(sz
, eregOfRM(modrm
)));
2884 assign( t1
, getIReg(sz
, eregOfRM(modrm
)) );
2885 codegen_div ( sz
, t1
, True
);
2886 DIP("idiv%c %s\n", nameISize(sz
), nameIReg(sz
, eregOfRM(modrm
)));
2889 /* This can't happen - gregOfRM should return 0 .. 7 only */
2890 vpanic("Grp3(x86)");
2893 addr
= disAMode ( &len
, sorb
, delta
, dis_buf
);
2896 assign(t1
, loadLE(ty
,mkexpr(addr
)));
2897 switch (gregOfRM(modrm
)) {
2898 case 0: { /* TEST */
2899 d32
= getUDisp(sz
, delta
); delta
+= sz
;
2901 assign(dst1
, binop(mkSizedOp(ty
,Iop_And8
),
2902 mkexpr(t1
), mkU(ty
,d32
)));
2903 setFlags_DEP1( Iop_And8
, dst1
, ty
);
2904 DIP("test%c $0x%x, %s\n", nameISize(sz
), d32
, dis_buf
);
2907 case 1: /* UNDEFINED */
2908 /* See comment above on R case */
2913 assign(dst1
, unop(mkSizedOp(ty
,Iop_Not8
), mkexpr(t1
)));
2915 casLE( mkexpr(addr
), mkexpr(t1
)/*expd*/, mkexpr(dst1
)/*new*/,
2916 guest_EIP_curr_instr
);
2918 storeLE( mkexpr(addr
), mkexpr(dst1
) );
2920 DIP("not%c %s\n", nameISize(sz
), dis_buf
);
2926 assign(dst0
, mkU(ty
,0));
2927 assign(src
, mkexpr(t1
));
2928 assign(dst1
, binop(mkSizedOp(ty
,Iop_Sub8
),
2929 mkexpr(dst0
), mkexpr(src
)));
2931 casLE( mkexpr(addr
), mkexpr(t1
)/*expd*/, mkexpr(dst1
)/*new*/,
2932 guest_EIP_curr_instr
);
2934 storeLE( mkexpr(addr
), mkexpr(dst1
) );
2936 setFlags_DEP1_DEP2(Iop_Sub8
, dst0
, src
, ty
);
2937 DIP("neg%c %s\n", nameISize(sz
), dis_buf
);
2940 codegen_mulL_A_D ( sz
, False
, t1
, dis_buf
);
2943 codegen_mulL_A_D ( sz
, True
, t1
, dis_buf
);
2946 codegen_div ( sz
, t1
, False
);
2947 DIP("div%c %s\n", nameISize(sz
), dis_buf
);
2950 codegen_div ( sz
, t1
, True
);
2951 DIP("idiv%c %s\n", nameISize(sz
), dis_buf
);
2954 /* This can't happen - gregOfRM should return 0 .. 7 only */
2955 vpanic("Grp3(x86)");
2962 /* Group 4 extended opcodes. */
2964 UInt
dis_Grp4 ( UChar sorb
, Bool locked
, Int delta
, Bool
* decode_OK
)
2970 IRTemp t1
= newTemp(ty
);
2971 IRTemp t2
= newTemp(ty
);
2975 modrm
= getIByte(delta
);
2977 if (locked
&& (gregOfRM(modrm
) != 0 && gregOfRM(modrm
) != 1)) {
2978 /* LOCK prefix only allowed with inc and dec subopcodes */
2983 if (epartIsReg(modrm
)) {
2984 assign(t1
, getIReg(1, eregOfRM(modrm
)));
2985 switch (gregOfRM(modrm
)) {
2987 assign(t2
, binop(Iop_Add8
, mkexpr(t1
), mkU8(1)));
2988 putIReg(1, eregOfRM(modrm
), mkexpr(t2
));
2989 setFlags_INC_DEC( True
, t2
, ty
);
2992 assign(t2
, binop(Iop_Sub8
, mkexpr(t1
), mkU8(1)));
2993 putIReg(1, eregOfRM(modrm
), mkexpr(t2
));
2994 setFlags_INC_DEC( False
, t2
, ty
);
3001 DIP("%sb %s\n", nameGrp4(gregOfRM(modrm
)),
3002 nameIReg(1, eregOfRM(modrm
)));
3004 IRTemp addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
3005 assign( t1
, loadLE(ty
, mkexpr(addr
)) );
3006 switch (gregOfRM(modrm
)) {
3008 assign(t2
, binop(Iop_Add8
, mkexpr(t1
), mkU8(1)));
3010 casLE( mkexpr(addr
), mkexpr(t1
)/*expd*/, mkexpr(t2
)/*new*/,
3011 guest_EIP_curr_instr
);
3013 storeLE( mkexpr(addr
), mkexpr(t2
) );
3015 setFlags_INC_DEC( True
, t2
, ty
);
3018 assign(t2
, binop(Iop_Sub8
, mkexpr(t1
), mkU8(1)));
3020 casLE( mkexpr(addr
), mkexpr(t1
)/*expd*/, mkexpr(t2
)/*new*/,
3021 guest_EIP_curr_instr
);
3023 storeLE( mkexpr(addr
), mkexpr(t2
) );
3025 setFlags_INC_DEC( False
, t2
, ty
);
3032 DIP("%sb %s\n", nameGrp4(gregOfRM(modrm
)), dis_buf
);
3038 /* Group 5 extended opcodes. */
3040 UInt
dis_Grp5 ( UChar sorb
, Bool locked
, Int sz
, Int delta
,
3041 /*MOD*/DisResult
* dres
, /*OUT*/Bool
* decode_OK
)
3046 IRTemp addr
= IRTemp_INVALID
;
3047 IRType ty
= szToITy(sz
);
3048 IRTemp t1
= newTemp(ty
);
3049 IRTemp t2
= IRTemp_INVALID
;
3053 modrm
= getIByte(delta
);
3055 if (locked
&& (gregOfRM(modrm
) != 0 && gregOfRM(modrm
) != 1)) {
3056 /* LOCK prefix only allowed with inc and dec subopcodes */
3061 if (epartIsReg(modrm
)) {
3062 assign(t1
, getIReg(sz
,eregOfRM(modrm
)));
3063 switch (gregOfRM(modrm
)) {
3065 vassert(sz
== 2 || sz
== 4);
3067 assign(t2
, binop(mkSizedOp(ty
,Iop_Add8
),
3068 mkexpr(t1
), mkU(ty
,1)));
3069 setFlags_INC_DEC( True
, t2
, ty
);
3070 putIReg(sz
,eregOfRM(modrm
),mkexpr(t2
));
3073 vassert(sz
== 2 || sz
== 4);
3075 assign(t2
, binop(mkSizedOp(ty
,Iop_Sub8
),
3076 mkexpr(t1
), mkU(ty
,1)));
3077 setFlags_INC_DEC( False
, t2
, ty
);
3078 putIReg(sz
,eregOfRM(modrm
),mkexpr(t2
));
3080 case 2: /* call Ev */
3082 t2
= newTemp(Ity_I32
);
3083 assign(t2
, binop(Iop_Sub32
, getIReg(4,R_ESP
), mkU32(4)));
3084 putIReg(4, R_ESP
, mkexpr(t2
));
3085 storeLE( mkexpr(t2
), mkU32(guest_EIP_bbstart
+delta
+1));
3086 jmp_treg(dres
, Ijk_Call
, t1
);
3087 vassert(dres
->whatNext
== Dis_StopHere
);
3089 case 4: /* jmp Ev */
3091 jmp_treg(dres
, Ijk_Boring
, t1
);
3092 vassert(dres
->whatNext
== Dis_StopHere
);
3094 case 6: /* PUSH Ev */
3095 vassert(sz
== 4 || sz
== 2);
3096 t2
= newTemp(Ity_I32
);
3097 assign( t2
, binop(Iop_Sub32
,getIReg(4,R_ESP
),mkU32(sz
)) );
3098 putIReg(4, R_ESP
, mkexpr(t2
) );
3099 storeLE( mkexpr(t2
), mkexpr(t1
) );
3106 DIP("%s%c %s\n", nameGrp5(gregOfRM(modrm
)),
3107 nameISize(sz
), nameIReg(sz
, eregOfRM(modrm
)));
3109 addr
= disAMode ( &len
, sorb
, delta
, dis_buf
);
3110 assign(t1
, loadLE(ty
,mkexpr(addr
)));
3111 switch (gregOfRM(modrm
)) {
3114 assign(t2
, binop(mkSizedOp(ty
,Iop_Add8
),
3115 mkexpr(t1
), mkU(ty
,1)));
3117 casLE( mkexpr(addr
),
3118 mkexpr(t1
), mkexpr(t2
), guest_EIP_curr_instr
);
3120 storeLE(mkexpr(addr
),mkexpr(t2
));
3122 setFlags_INC_DEC( True
, t2
, ty
);
3126 assign(t2
, binop(mkSizedOp(ty
,Iop_Sub8
),
3127 mkexpr(t1
), mkU(ty
,1)));
3129 casLE( mkexpr(addr
),
3130 mkexpr(t1
), mkexpr(t2
), guest_EIP_curr_instr
);
3132 storeLE(mkexpr(addr
),mkexpr(t2
));
3134 setFlags_INC_DEC( False
, t2
, ty
);
3136 case 2: /* call Ev */
3138 t2
= newTemp(Ity_I32
);
3139 assign(t2
, binop(Iop_Sub32
, getIReg(4,R_ESP
), mkU32(4)));
3140 putIReg(4, R_ESP
, mkexpr(t2
));
3141 storeLE( mkexpr(t2
), mkU32(guest_EIP_bbstart
+delta
+len
));
3142 jmp_treg(dres
, Ijk_Call
, t1
);
3143 vassert(dres
->whatNext
== Dis_StopHere
);
3145 case 4: /* JMP Ev */
3147 jmp_treg(dres
, Ijk_Boring
, t1
);
3148 vassert(dres
->whatNext
== Dis_StopHere
);
3150 case 6: /* PUSH Ev */
3151 vassert(sz
== 4 || sz
== 2);
3152 t2
= newTemp(Ity_I32
);
3153 assign( t2
, binop(Iop_Sub32
,getIReg(4,R_ESP
),mkU32(sz
)) );
3154 putIReg(4, R_ESP
, mkexpr(t2
) );
3155 storeLE( mkexpr(t2
), mkexpr(t1
) );
3162 DIP("%s%c %s\n", nameGrp5(gregOfRM(modrm
)),
3163 nameISize(sz
), dis_buf
);
3169 /*------------------------------------------------------------*/
3170 /*--- Disassembling string ops (including REP prefixes) ---*/
3171 /*------------------------------------------------------------*/
3173 /* Code shared by all the string ops */
3175 void dis_string_op_increment(Int sz
, IRTemp t_inc
)
3177 if (sz
== 4 || sz
== 2) {
3179 binop(Iop_Shl32
, IRExpr_Get( OFFB_DFLAG
, Ity_I32
),
3183 IRExpr_Get( OFFB_DFLAG
, Ity_I32
) );
3188 void dis_string_op( void (*dis_OP
)( Int
, IRTemp
),
3189 Int sz
, const HChar
* name
, UChar sorb
)
3191 IRTemp t_inc
= newTemp(Ity_I32
);
3192 vassert(sorb
== 0); /* hmm. so what was the point of passing it in? */
3193 dis_string_op_increment(sz
, t_inc
);
3194 dis_OP( sz
, t_inc
);
3195 DIP("%s%c\n", name
, nameISize(sz
));
3199 void dis_MOVS ( Int sz
, IRTemp t_inc
)
3201 IRType ty
= szToITy(sz
);
3202 IRTemp td
= newTemp(Ity_I32
); /* EDI */
3203 IRTemp ts
= newTemp(Ity_I32
); /* ESI */
3205 assign( td
, getIReg(4, R_EDI
) );
3206 assign( ts
, getIReg(4, R_ESI
) );
3208 storeLE( mkexpr(td
), loadLE(ty
,mkexpr(ts
)) );
3210 putIReg( 4, R_EDI
, binop(Iop_Add32
, mkexpr(td
), mkexpr(t_inc
)) );
3211 putIReg( 4, R_ESI
, binop(Iop_Add32
, mkexpr(ts
), mkexpr(t_inc
)) );
3215 void dis_LODS ( Int sz
, IRTemp t_inc
)
3217 IRType ty
= szToITy(sz
);
3218 IRTemp ts
= newTemp(Ity_I32
); /* ESI */
3220 assign( ts
, getIReg(4, R_ESI
) );
3222 putIReg( sz
, R_EAX
, loadLE(ty
, mkexpr(ts
)) );
3224 putIReg( 4, R_ESI
, binop(Iop_Add32
, mkexpr(ts
), mkexpr(t_inc
)) );
3228 void dis_STOS ( Int sz
, IRTemp t_inc
)
3230 IRType ty
= szToITy(sz
);
3231 IRTemp ta
= newTemp(ty
); /* EAX */
3232 IRTemp td
= newTemp(Ity_I32
); /* EDI */
3234 assign( ta
, getIReg(sz
, R_EAX
) );
3235 assign( td
, getIReg(4, R_EDI
) );
3237 storeLE( mkexpr(td
), mkexpr(ta
) );
3239 putIReg( 4, R_EDI
, binop(Iop_Add32
, mkexpr(td
), mkexpr(t_inc
)) );
3243 void dis_CMPS ( Int sz
, IRTemp t_inc
)
3245 IRType ty
= szToITy(sz
);
3246 IRTemp tdv
= newTemp(ty
); /* (EDI) */
3247 IRTemp tsv
= newTemp(ty
); /* (ESI) */
3248 IRTemp td
= newTemp(Ity_I32
); /* EDI */
3249 IRTemp ts
= newTemp(Ity_I32
); /* ESI */
3251 assign( td
, getIReg(4, R_EDI
) );
3252 assign( ts
, getIReg(4, R_ESI
) );
3254 assign( tdv
, loadLE(ty
,mkexpr(td
)) );
3255 assign( tsv
, loadLE(ty
,mkexpr(ts
)) );
3257 setFlags_DEP1_DEP2 ( Iop_Sub8
, tsv
, tdv
, ty
);
3259 putIReg(4, R_EDI
, binop(Iop_Add32
, mkexpr(td
), mkexpr(t_inc
)) );
3260 putIReg(4, R_ESI
, binop(Iop_Add32
, mkexpr(ts
), mkexpr(t_inc
)) );
3264 void dis_SCAS ( Int sz
, IRTemp t_inc
)
3266 IRType ty
= szToITy(sz
);
3267 IRTemp ta
= newTemp(ty
); /* EAX */
3268 IRTemp td
= newTemp(Ity_I32
); /* EDI */
3269 IRTemp tdv
= newTemp(ty
); /* (EDI) */
3271 assign( ta
, getIReg(sz
, R_EAX
) );
3272 assign( td
, getIReg(4, R_EDI
) );
3274 assign( tdv
, loadLE(ty
,mkexpr(td
)) );
3275 setFlags_DEP1_DEP2 ( Iop_Sub8
, ta
, tdv
, ty
);
3277 putIReg(4, R_EDI
, binop(Iop_Add32
, mkexpr(td
), mkexpr(t_inc
)) );
3281 /* Wrap the appropriate string op inside a REP/REPE/REPNE.
3282 We assume the insn is the last one in the basic block, and so emit a jump
3283 to the next insn, rather than just falling through. */
3285 void dis_REP_op ( /*MOD*/DisResult
* dres
,
3287 void (*dis_OP
)(Int
, IRTemp
),
3288 Int sz
, Addr32 eip
, Addr32 eip_next
, const HChar
* name
)
3290 IRTemp t_inc
= newTemp(Ity_I32
);
3291 IRTemp tc
= newTemp(Ity_I32
); /* ECX */
3293 assign( tc
, getIReg(4,R_ECX
) );
3295 stmt( IRStmt_Exit( binop(Iop_CmpEQ32
,mkexpr(tc
),mkU32(0)),
3297 IRConst_U32(eip_next
), OFFB_EIP
) );
3299 putIReg(4, R_ECX
, binop(Iop_Sub32
, mkexpr(tc
), mkU32(1)) );
3301 dis_string_op_increment(sz
, t_inc
);
3304 if (cond
== X86CondAlways
) {
3305 jmp_lit(dres
, Ijk_Boring
, eip
);
3306 vassert(dres
->whatNext
== Dis_StopHere
);
3308 stmt( IRStmt_Exit( mk_x86g_calculate_condition(cond
),
3310 IRConst_U32(eip
), OFFB_EIP
) );
3311 jmp_lit(dres
, Ijk_Boring
, eip_next
);
3312 vassert(dres
->whatNext
== Dis_StopHere
);
3314 DIP("%s%c\n", name
, nameISize(sz
));
3318 /*------------------------------------------------------------*/
3319 /*--- Arithmetic, etc. ---*/
3320 /*------------------------------------------------------------*/
3322 /* IMUL E, G. Supplied eip points to the modR/M byte. */
3324 UInt
dis_mul_E_G ( UChar sorb
,
3330 UChar rm
= getIByte(delta0
);
3331 IRType ty
= szToITy(size
);
3332 IRTemp te
= newTemp(ty
);
3333 IRTemp tg
= newTemp(ty
);
3334 IRTemp resLo
= newTemp(ty
);
3336 assign( tg
, getIReg(size
, gregOfRM(rm
)) );
3337 if (epartIsReg(rm
)) {
3338 assign( te
, getIReg(size
, eregOfRM(rm
)) );
3340 IRTemp addr
= disAMode( &alen
, sorb
, delta0
, dis_buf
);
3341 assign( te
, loadLE(ty
,mkexpr(addr
)) );
3344 setFlags_MUL ( ty
, te
, tg
, X86G_CC_OP_SMULB
);
3346 assign( resLo
, binop( mkSizedOp(ty
, Iop_Mul8
), mkexpr(te
), mkexpr(tg
) ) );
3348 putIReg(size
, gregOfRM(rm
), mkexpr(resLo
) );
3350 if (epartIsReg(rm
)) {
3351 DIP("imul%c %s, %s\n", nameISize(size
),
3352 nameIReg(size
,eregOfRM(rm
)),
3353 nameIReg(size
,gregOfRM(rm
)));
3356 DIP("imul%c %s, %s\n", nameISize(size
),
3357 dis_buf
, nameIReg(size
,gregOfRM(rm
)));
3363 /* IMUL I * E -> G. Supplied eip points to the modR/M byte. */
3365 UInt
dis_imul_I_E_G ( UChar sorb
,
3372 UChar rm
= getIByte(delta
);
3373 IRType ty
= szToITy(size
);
3374 IRTemp te
= newTemp(ty
);
3375 IRTemp tl
= newTemp(ty
);
3376 IRTemp resLo
= newTemp(ty
);
3378 vassert(size
== 1 || size
== 2 || size
== 4);
3380 if (epartIsReg(rm
)) {
3381 assign(te
, getIReg(size
, eregOfRM(rm
)));
3384 IRTemp addr
= disAMode( &alen
, sorb
, delta
, dis_buf
);
3385 assign(te
, loadLE(ty
, mkexpr(addr
)));
3388 d32
= getSDisp(litsize
,delta
);
3391 if (size
== 1) d32
&= 0xFF;
3392 if (size
== 2) d32
&= 0xFFFF;
3394 assign(tl
, mkU(ty
,d32
));
3396 assign( resLo
, binop( mkSizedOp(ty
, Iop_Mul8
), mkexpr(te
), mkexpr(tl
) ));
3398 setFlags_MUL ( ty
, te
, tl
, X86G_CC_OP_SMULB
);
3400 putIReg(size
, gregOfRM(rm
), mkexpr(resLo
));
3402 DIP("imul %d, %s, %s\n", d32
,
3403 ( epartIsReg(rm
) ? nameIReg(size
,eregOfRM(rm
)) : dis_buf
),
3404 nameIReg(size
,gregOfRM(rm
)) );
3409 /* Generate an IR sequence to do a count-leading-zeroes operation on
3410 the supplied IRTemp, and return a new IRTemp holding the result.
3411 'ty' may be Ity_I16 or Ity_I32 only. In the case where the
3412 argument is zero, return the number of bits in the word (the
3413 natural semantics). */
3414 static IRTemp
gen_LZCNT ( IRType ty
, IRTemp src
)
3416 vassert(ty
== Ity_I32
|| ty
== Ity_I16
);
3418 IRTemp src32
= newTemp(Ity_I32
);
3419 assign(src32
, widenUto32( mkexpr(src
) ));
3421 IRTemp src32x
= newTemp(Ity_I32
);
3423 binop(Iop_Shl32
, mkexpr(src32
),
3424 mkU8(32 - 8 * sizeofIRType(ty
))));
3426 // Clz32 has undefined semantics when its input is zero, so
3427 // special-case around that.
3428 IRTemp res32
= newTemp(Ity_I32
);
3431 binop(Iop_CmpEQ32
, mkexpr(src32x
), mkU32(0)),
3432 mkU32(8 * sizeofIRType(ty
)),
3433 unop(Iop_Clz32
, mkexpr(src32x
))
3436 IRTemp res
= newTemp(ty
);
3437 assign(res
, narrowTo(ty
, mkexpr(res32
)));
3442 /*------------------------------------------------------------*/
3444 /*--- x87 FLOATING POINT INSTRUCTIONS ---*/
3446 /*------------------------------------------------------------*/
3448 /* --- Helper functions for dealing with the register stack. --- */
3450 /* --- Set the emulation-warning pseudo-register. --- */
3452 static void put_emwarn ( IRExpr
* e
/* :: Ity_I32 */ )
3454 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I32
);
3455 stmt( IRStmt_Put( OFFB_EMNOTE
, e
) );
3458 /* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */
3460 static IRExpr
* mkQNaN64 ( void )
3462 /* QNaN is 0 2047 1 0(51times)
3463 == 0b 11111111111b 1 0(51times)
3464 == 0x7FF8 0000 0000 0000
3466 return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL
));
3469 /* --------- Get/put the top-of-stack pointer. --------- */
3471 static IRExpr
* get_ftop ( void )
3473 return IRExpr_Get( OFFB_FTOP
, Ity_I32
);
3476 static void put_ftop ( IRExpr
* e
)
3478 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I32
);
3479 stmt( IRStmt_Put( OFFB_FTOP
, e
) );
3482 /* --------- Get/put the C3210 bits. --------- */
3484 static IRExpr
* get_C3210 ( void )
3486 return IRExpr_Get( OFFB_FC3210
, Ity_I32
);
3489 static void put_C3210 ( IRExpr
* e
)
3491 stmt( IRStmt_Put( OFFB_FC3210
, e
) );
3494 /* --------- Get/put the FPU rounding mode. --------- */
3495 static IRExpr
* /* :: Ity_I32 */ get_fpround ( void )
3497 return IRExpr_Get( OFFB_FPROUND
, Ity_I32
);
3500 static void put_fpround ( IRExpr
* /* :: Ity_I32 */ e
)
3502 stmt( IRStmt_Put( OFFB_FPROUND
, e
) );
3506 /* --------- Synthesise a 2-bit FPU rounding mode. --------- */
3507 /* Produces a value in 0 .. 3, which is encoded as per the type
3508 IRRoundingMode. Since the guest_FPROUND value is also encoded as
3509 per IRRoundingMode, we merely need to get it and mask it for
3512 static IRExpr
* /* :: Ity_I32 */ get_roundingmode ( void )
3514 return binop( Iop_And32
, get_fpround(), mkU32(3) );
3517 static IRExpr
* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
3519 return mkU32(Irrm_NEAREST
);
3523 /* --------- Get/set FP register tag bytes. --------- */
3525 /* Given i, and some expression e, generate 'ST_TAG(i) = e'. */
3527 static void put_ST_TAG ( Int i
, IRExpr
* value
)
3530 vassert(typeOfIRExpr(irsb
->tyenv
, value
) == Ity_I8
);
3531 descr
= mkIRRegArray( OFFB_FPTAGS
, Ity_I8
, 8 );
3532 stmt( IRStmt_PutI( mkIRPutI(descr
, get_ftop(), i
, value
) ) );
3535 /* Given i, generate an expression yielding 'ST_TAG(i)'. This will be
3536 zero to indicate "Empty" and nonzero to indicate "NonEmpty". */
3538 static IRExpr
* get_ST_TAG ( Int i
)
3540 IRRegArray
* descr
= mkIRRegArray( OFFB_FPTAGS
, Ity_I8
, 8 );
3541 return IRExpr_GetI( descr
, get_ftop(), i
);
3545 /* --------- Get/set FP registers. --------- */
3547 /* Given i, and some expression e, emit 'ST(i) = e' and set the
3548 register's tag to indicate the register is full. The previous
3549 state of the register is not checked. */
3551 static void put_ST_UNCHECKED ( Int i
, IRExpr
* value
)
3554 vassert(typeOfIRExpr(irsb
->tyenv
, value
) == Ity_F64
);
3555 descr
= mkIRRegArray( OFFB_FPREGS
, Ity_F64
, 8 );
3556 stmt( IRStmt_PutI( mkIRPutI(descr
, get_ftop(), i
, value
) ) );
3557 /* Mark the register as in-use. */
3558 put_ST_TAG(i
, mkU8(1));
3561 /* Given i, and some expression e, emit
3562 ST(i) = is_full(i) ? NaN : e
3563 and set the tag accordingly.
3566 static void put_ST ( Int i
, IRExpr
* value
)
3570 IRExpr_ITE( binop(Iop_CmpNE8
, get_ST_TAG(i
), mkU8(0)),
3571 /* non-0 means full */
3580 /* Given i, generate an expression yielding 'ST(i)'. */
3582 static IRExpr
* get_ST_UNCHECKED ( Int i
)
3584 IRRegArray
* descr
= mkIRRegArray( OFFB_FPREGS
, Ity_F64
, 8 );
3585 return IRExpr_GetI( descr
, get_ftop(), i
);
3589 /* Given i, generate an expression yielding
3590 is_full(i) ? ST(i) : NaN
3593 static IRExpr
* get_ST ( Int i
)
3596 IRExpr_ITE( binop(Iop_CmpNE8
, get_ST_TAG(i
), mkU8(0)),
3597 /* non-0 means full */
3598 get_ST_UNCHECKED(i
),
3604 /* Given i, and some expression e, and a condition cond, generate IR
3605 which has the same effect as put_ST(i,e) when cond is true and has
3606 no effect when cond is false. Given the lack of proper
3607 if-then-else in the IR, this is pretty tricky.
3610 static void maybe_put_ST ( IRTemp cond
, Int i
, IRExpr
* value
)
3612 // new_tag = if cond then FULL else old_tag
3613 // new_val = if cond then (if old_tag==FULL then NaN else val)
3616 IRTemp old_tag
= newTemp(Ity_I8
);
3617 assign(old_tag
, get_ST_TAG(i
));
3618 IRTemp new_tag
= newTemp(Ity_I8
);
3620 IRExpr_ITE(mkexpr(cond
), mkU8(1)/*FULL*/, mkexpr(old_tag
)));
3622 IRTemp old_val
= newTemp(Ity_F64
);
3623 assign(old_val
, get_ST_UNCHECKED(i
));
3624 IRTemp new_val
= newTemp(Ity_F64
);
3626 IRExpr_ITE(mkexpr(cond
),
3627 IRExpr_ITE(binop(Iop_CmpNE8
, mkexpr(old_tag
), mkU8(0)),
3628 /* non-0 means full */
3634 put_ST_UNCHECKED(i
, mkexpr(new_val
));
3635 // put_ST_UNCHECKED incorrectly sets tag(i) to always be FULL. So
3636 // now set it to new_tag instead.
3637 put_ST_TAG(i
, mkexpr(new_tag
));
3640 /* Adjust FTOP downwards by one register. */
3642 static void fp_push ( void )
3644 put_ftop( binop(Iop_Sub32
, get_ftop(), mkU32(1)) );
3647 /* Adjust FTOP downwards by one register when COND is 1:I1. Else
3650 static void maybe_fp_push ( IRTemp cond
)
3652 put_ftop( binop(Iop_Sub32
, get_ftop(), unop(Iop_1Uto32
,mkexpr(cond
))) );
3655 /* Adjust FTOP upwards by one register, and mark the vacated register
3658 static void fp_pop ( void )
3660 put_ST_TAG(0, mkU8(0));
3661 put_ftop( binop(Iop_Add32
, get_ftop(), mkU32(1)) );
3664 /* Set the C2 bit of the FPU status register to e[0]. Assumes that
3667 static void set_C2 ( IRExpr
* e
)
3669 IRExpr
* cleared
= binop(Iop_And32
, get_C3210(), mkU32(~X86G_FC_MASK_C2
));
3670 put_C3210( binop(Iop_Or32
,
3672 binop(Iop_Shl32
, e
, mkU8(X86G_FC_SHIFT_C2
))) );
3675 /* Generate code to check that abs(d64) < 2^63 and is finite. This is
3676 used to do the range checks for FSIN, FCOS, FSINCOS and FPTAN. The
3677 test is simple, but the derivation of it is not so simple.
3679 The exponent field for an IEEE754 double is 11 bits. That means it
3680 can take values 0 through 0x7FF. If the exponent has value 0x7FF,
3681 the number is either a NaN or an Infinity and so is not finite.
3682 Furthermore, a finite value of exactly 2^63 is the smallest value
3683 that has exponent value 0x43E. Hence, what we need to do is
3684 extract the exponent, ignoring the sign bit and mantissa, and check
3685 it is < 0x43E, or <= 0x43D.
3687 To make this easily applicable to 32- and 64-bit targets, a
3688 roundabout approach is used. First the number is converted to I64,
3689 then the top 32 bits are taken. Shifting them right by 20 bits
3690 places the sign bit and exponent in the bottom 12 bits. Anding
3691 with 0x7FF gets rid of the sign bit, leaving just the exponent
3692 available for comparison.
3694 static IRTemp
math_IS_TRIG_ARG_FINITE_AND_IN_RANGE ( IRTemp d64
)
3696 IRTemp i64
= newTemp(Ity_I64
);
3697 assign(i64
, unop(Iop_ReinterpF64asI64
, mkexpr(d64
)) );
3698 IRTemp exponent
= newTemp(Ity_I32
);
3701 binop(Iop_Shr32
, unop(Iop_64HIto32
, mkexpr(i64
)), mkU8(20)),
3703 IRTemp in_range_and_finite
= newTemp(Ity_I1
);
3704 assign(in_range_and_finite
,
3705 binop(Iop_CmpLE32U
, mkexpr(exponent
), mkU32(0x43D)));
3706 return in_range_and_finite
;
3709 /* Invent a plausible-looking FPU status word value:
3710 ((ftop & 7) << 11) | (c3210 & 0x4700)
3712 static IRExpr
* get_FPU_sw ( void )
3718 binop(Iop_And32
, get_ftop(), mkU32(7)),
3720 binop(Iop_And32
, get_C3210(), mkU32(0x4700))
3725 /* ------------------------------------------------------- */
3726 /* Given all that stack-mangling junk, we can now go ahead
3727 and describe FP instructions.
3730 /* ST(0) = ST(0) `op` mem64/32(addr)
3731 Need to check ST(0)'s tag on read, but not on write.
3734 void fp_do_op_mem_ST_0 ( IRTemp addr
, const HChar
* op_txt
, HChar
* dis_buf
,
3737 DIP("f%s%c %s\n", op_txt
, dbl
?'l':'s', dis_buf
);
3741 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3743 loadLE(Ity_F64
,mkexpr(addr
))
3748 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3750 unop(Iop_F32toF64
, loadLE(Ity_F32
,mkexpr(addr
)))
3756 /* ST(0) = mem64/32(addr) `op` ST(0)
3757 Need to check ST(0)'s tag on read, but not on write.
3760 void fp_do_oprev_mem_ST_0 ( IRTemp addr
, const HChar
* op_txt
, HChar
* dis_buf
,
3763 DIP("f%s%c %s\n", op_txt
, dbl
?'l':'s', dis_buf
);
3767 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3768 loadLE(Ity_F64
,mkexpr(addr
)),
3774 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3775 unop(Iop_F32toF64
, loadLE(Ity_F32
,mkexpr(addr
))),
3782 /* ST(dst) = ST(dst) `op` ST(src).
3783 Check dst and src tags when reading but not on write.
3786 void fp_do_op_ST_ST ( const HChar
* op_txt
, IROp op
, UInt st_src
, UInt st_dst
,
3789 DIP("f%s%s st(%u), st(%u)\n", op_txt
, pop_after
?"p":"",
3794 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3802 /* ST(dst) = ST(src) `op` ST(dst).
3803 Check dst and src tags when reading but not on write.
3806 void fp_do_oprev_ST_ST ( const HChar
* op_txt
, IROp op
, UInt st_src
,
3807 UInt st_dst
, Bool pop_after
)
3809 DIP("f%s%s st(%u), st(%u)\n", op_txt
, pop_after
?"p":"",
3814 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3822 /* %eflags(Z,P,C) = UCOMI( st(0), st(i) ) */
3823 static void fp_do_ucomi_ST0_STi ( UInt i
, Bool pop_after
)
3825 DIP("fucomi%s %%st(0),%%st(%u)\n", pop_after
? "p" : "", i
);
3826 /* This is a bit of a hack (and isn't really right). It sets
3827 Z,P,C,O correctly, but forces A and S to zero, whereas the Intel
3828 documentation implies A and S are unchanged.
3830 /* It's also fishy in that it is used both for COMIP and
3831 UCOMIP, and they aren't the same (although similar). */
3832 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
3833 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
3834 stmt( IRStmt_Put( OFFB_CC_DEP1
,
3836 binop(Iop_CmpF64
, get_ST(0), get_ST(i
)),
3839 /* Set NDEP even though it isn't used. This makes redundant-PUT
3840 elimination of previous stores to this field work better. */
3841 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
3848 UInt
dis_FPU ( Bool
* decode_ok
, UChar sorb
, Int delta
)
3855 /* On entry, delta points at the second byte of the insn (the modrm
3857 UChar first_opcode
= getIByte(delta
-1);
3858 UChar modrm
= getIByte(delta
+0);
3860 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */
3862 if (first_opcode
== 0xD8) {
3865 /* bits 5,4,3 are an opcode extension, and the modRM also
3866 specifies an address. */
3867 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
3870 switch (gregOfRM(modrm
)) {
3872 case 0: /* FADD single-real */
3873 fp_do_op_mem_ST_0 ( addr
, "add", dis_buf
, Iop_AddF64
, False
);
3876 case 1: /* FMUL single-real */
3877 fp_do_op_mem_ST_0 ( addr
, "mul", dis_buf
, Iop_MulF64
, False
);
3880 case 2: /* FCOM single-real */
3881 DIP("fcoms %s\n", dis_buf
);
3882 /* This forces C1 to zero, which isn't right. */
3889 loadLE(Ity_F32
,mkexpr(addr
)))),
3895 case 3: /* FCOMP single-real */
3896 DIP("fcomps %s\n", dis_buf
);
3897 /* This forces C1 to zero, which isn't right. */
3904 loadLE(Ity_F32
,mkexpr(addr
)))),
3911 case 4: /* FSUB single-real */
3912 fp_do_op_mem_ST_0 ( addr
, "sub", dis_buf
, Iop_SubF64
, False
);
3915 case 5: /* FSUBR single-real */
3916 fp_do_oprev_mem_ST_0 ( addr
, "subr", dis_buf
, Iop_SubF64
, False
);
3919 case 6: /* FDIV single-real */
3920 fp_do_op_mem_ST_0 ( addr
, "div", dis_buf
, Iop_DivF64
, False
);
3923 case 7: /* FDIVR single-real */
3924 fp_do_oprev_mem_ST_0 ( addr
, "divr", dis_buf
, Iop_DivF64
, False
);
3928 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt
)gregOfRM(modrm
));
3929 vex_printf("first_opcode == 0xD8\n");
3936 case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */
3937 fp_do_op_ST_ST ( "add", Iop_AddF64
, modrm
- 0xC0, 0, False
);
3940 case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */
3941 fp_do_op_ST_ST ( "mul", Iop_MulF64
, modrm
- 0xC8, 0, False
);
3944 /* Dunno if this is right */
3945 case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */
3946 r_dst
= (UInt
)modrm
- 0xD0;
3947 DIP("fcom %%st(0),%%st(%u)\n", r_dst
);
3948 /* This forces C1 to zero, which isn't right. */
3952 binop(Iop_CmpF64
, get_ST(0), get_ST(r_dst
)),
3958 /* Dunno if this is right */
3959 case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */
3960 r_dst
= (UInt
)modrm
- 0xD8;
3961 DIP("fcomp %%st(0),%%st(%u)\n", r_dst
);
3962 /* This forces C1 to zero, which isn't right. */
3966 binop(Iop_CmpF64
, get_ST(0), get_ST(r_dst
)),
3973 case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */
3974 fp_do_op_ST_ST ( "sub", Iop_SubF64
, modrm
- 0xE0, 0, False
);
3977 case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */
3978 fp_do_oprev_ST_ST ( "subr", Iop_SubF64
, modrm
- 0xE8, 0, False
);
3981 case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */
3982 fp_do_op_ST_ST ( "div", Iop_DivF64
, modrm
- 0xF0, 0, False
);
3985 case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */
3986 fp_do_oprev_ST_ST ( "divr", Iop_DivF64
, modrm
- 0xF8, 0, False
);
3995 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */
3997 if (first_opcode
== 0xD9) {
4000 /* bits 5,4,3 are an opcode extension, and the modRM also
4001 specifies an address. */
4002 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
4005 switch (gregOfRM(modrm
)) {
4007 case 0: /* FLD single-real */
4008 DIP("flds %s\n", dis_buf
);
4010 put_ST(0, unop(Iop_F32toF64
,
4011 loadLE(Ity_F32
, mkexpr(addr
))));
4014 case 2: /* FST single-real */
4015 DIP("fsts %s\n", dis_buf
);
4016 storeLE(mkexpr(addr
),
4017 binop(Iop_F64toF32
, get_roundingmode(), get_ST(0)));
4020 case 3: /* FSTP single-real */
4021 DIP("fstps %s\n", dis_buf
);
4022 storeLE(mkexpr(addr
),
4023 binop(Iop_F64toF32
, get_roundingmode(), get_ST(0)));
4027 case 4: { /* FLDENV m28 */
4028 /* Uses dirty helper:
4029 VexEmNote x86g_do_FLDENV ( VexGuestX86State*, HWord ) */
4030 IRTemp ew
= newTemp(Ity_I32
);
4031 IRDirty
* d
= unsafeIRDirty_0_N (
4033 "x86g_dirtyhelper_FLDENV",
4034 &x86g_dirtyhelper_FLDENV
,
4035 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
4038 /* declare we're reading memory */
4040 d
->mAddr
= mkexpr(addr
);
4043 /* declare we're writing guest state */
4045 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
4047 d
->fxState
[0].fx
= Ifx_Write
;
4048 d
->fxState
[0].offset
= OFFB_FTOP
;
4049 d
->fxState
[0].size
= sizeof(UInt
);
4051 d
->fxState
[1].fx
= Ifx_Write
;
4052 d
->fxState
[1].offset
= OFFB_FPTAGS
;
4053 d
->fxState
[1].size
= 8 * sizeof(UChar
);
4055 d
->fxState
[2].fx
= Ifx_Write
;
4056 d
->fxState
[2].offset
= OFFB_FPROUND
;
4057 d
->fxState
[2].size
= sizeof(UInt
);
4059 d
->fxState
[3].fx
= Ifx_Write
;
4060 d
->fxState
[3].offset
= OFFB_FC3210
;
4061 d
->fxState
[3].size
= sizeof(UInt
);
4063 stmt( IRStmt_Dirty(d
) );
4065 /* ew contains any emulation warning we may need to
4066 issue. If needed, side-exit to the next insn,
4067 reporting the warning, so that Valgrind's dispatcher
4068 sees the warning. */
4069 put_emwarn( mkexpr(ew
) );
4072 binop(Iop_CmpNE32
, mkexpr(ew
), mkU32(0)),
4074 IRConst_U32( ((Addr32
)guest_EIP_bbstart
)+delta
),
4079 DIP("fldenv %s\n", dis_buf
);
4083 case 5: {/* FLDCW */
4084 /* The only thing we observe in the control word is the
4085 rounding mode. Therefore, pass the 16-bit value
4086 (x87 native-format control word) to a clean helper,
4087 getting back a 64-bit value, the lower half of which
4088 is the FPROUND value to store, and the upper half of
4089 which is the emulation-warning token which may be
4092 /* ULong x86h_check_fldcw ( UInt ); */
4093 IRTemp t64
= newTemp(Ity_I64
);
4094 IRTemp ew
= newTemp(Ity_I32
);
4095 DIP("fldcw %s\n", dis_buf
);
4096 assign( t64
, mkIRExprCCall(
4097 Ity_I64
, 0/*regparms*/,
4102 loadLE(Ity_I16
, mkexpr(addr
)))
4107 put_fpround( unop(Iop_64to32
, mkexpr(t64
)) );
4108 assign( ew
, unop(Iop_64HIto32
, mkexpr(t64
) ) );
4109 put_emwarn( mkexpr(ew
) );
4110 /* Finally, if an emulation warning was reported,
4111 side-exit to the next insn, reporting the warning,
4112 so that Valgrind's dispatcher sees the warning. */
4115 binop(Iop_CmpNE32
, mkexpr(ew
), mkU32(0)),
4117 IRConst_U32( ((Addr32
)guest_EIP_bbstart
)+delta
),
4124 case 6: { /* FNSTENV m28 */
4125 /* Uses dirty helper:
4126 void x86g_do_FSTENV ( VexGuestX86State*, HWord ) */
4127 IRDirty
* d
= unsafeIRDirty_0_N (
4129 "x86g_dirtyhelper_FSTENV",
4130 &x86g_dirtyhelper_FSTENV
,
4131 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
4133 /* declare we're writing memory */
4135 d
->mAddr
= mkexpr(addr
);
4138 /* declare we're reading guest state */
4140 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
4142 d
->fxState
[0].fx
= Ifx_Read
;
4143 d
->fxState
[0].offset
= OFFB_FTOP
;
4144 d
->fxState
[0].size
= sizeof(UInt
);
4146 d
->fxState
[1].fx
= Ifx_Read
;
4147 d
->fxState
[1].offset
= OFFB_FPTAGS
;
4148 d
->fxState
[1].size
= 8 * sizeof(UChar
);
4150 d
->fxState
[2].fx
= Ifx_Read
;
4151 d
->fxState
[2].offset
= OFFB_FPROUND
;
4152 d
->fxState
[2].size
= sizeof(UInt
);
4154 d
->fxState
[3].fx
= Ifx_Read
;
4155 d
->fxState
[3].offset
= OFFB_FC3210
;
4156 d
->fxState
[3].size
= sizeof(UInt
);
4158 stmt( IRStmt_Dirty(d
) );
4160 DIP("fnstenv %s\n", dis_buf
);
4164 case 7: /* FNSTCW */
4165 /* Fake up a native x87 FPU control word. The only
4166 thing it depends on is FPROUND[1:0], so call a clean
4167 helper to cook it up. */
4168 /* UInt x86h_create_fpucw ( UInt fpround ) */
4169 DIP("fnstcw %s\n", dis_buf
);
4175 "x86g_create_fpucw", &x86g_create_fpucw
,
4176 mkIRExprVec_1( get_fpround() )
4183 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt
)gregOfRM(modrm
));
4184 vex_printf("first_opcode == 0xD9\n");
4192 case 0xC0 ... 0xC7: /* FLD %st(?) */
4193 r_src
= (UInt
)modrm
- 0xC0;
4194 DIP("fld %%st(%u)\n", r_src
);
4195 t1
= newTemp(Ity_F64
);
4196 assign(t1
, get_ST(r_src
));
4198 put_ST(0, mkexpr(t1
));
4201 case 0xC8 ... 0xCF: /* FXCH %st(?) */
4202 r_src
= (UInt
)modrm
- 0xC8;
4203 DIP("fxch %%st(%u)\n", r_src
);
4204 t1
= newTemp(Ity_F64
);
4205 t2
= newTemp(Ity_F64
);
4206 assign(t1
, get_ST(0));
4207 assign(t2
, get_ST(r_src
));
4208 put_ST_UNCHECKED(0, mkexpr(t2
));
4209 put_ST_UNCHECKED(r_src
, mkexpr(t1
));
4212 case 0xE0: /* FCHS */
4214 put_ST_UNCHECKED(0, unop(Iop_NegF64
, get_ST(0)));
4217 case 0xE1: /* FABS */
4219 put_ST_UNCHECKED(0, unop(Iop_AbsF64
, get_ST(0)));
4222 case 0xE4: /* FTST */
4224 /* This forces C1 to zero, which isn't right. */
4225 /* Well, in fact the Intel docs say (bizarrely): "C1 is
4226 set to 0 if stack underflow occurred; otherwise, set
4227 to 0" which is pretty nonsensical. I guess it's a
4234 IRExpr_Const(IRConst_F64i(0x0ULL
))),
4240 case 0xE5: { /* FXAM */
4241 /* This is an interesting one. It examines %st(0),
4242 regardless of whether the tag says it's empty or not.
4243 Here, just pass both the tag (in our format) and the
4244 value (as a double, actually a ULong) to a helper
4247 = mkIRExprVec_2( unop(Iop_8Uto32
, get_ST_TAG(0)),
4248 unop(Iop_ReinterpF64asI64
,
4249 get_ST_UNCHECKED(0)) );
4250 put_C3210(mkIRExprCCall(
4253 "x86g_calculate_FXAM", &x86g_calculate_FXAM
,
4260 case 0xE8: /* FLD1 */
4263 /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */
4264 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL
)));
4267 case 0xE9: /* FLDL2T */
4270 /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */
4271 put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL
)));
4274 case 0xEA: /* FLDL2E */
4277 /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */
4278 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL
)));
4281 case 0xEB: /* FLDPI */
4284 /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */
4285 put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL
)));
4288 case 0xEC: /* FLDLG2 */
4291 /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */
4292 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL
)));
4295 case 0xED: /* FLDLN2 */
4298 /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */
4299 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL
)));
4302 case 0xEE: /* FLDZ */
4305 /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */
4306 put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL
)));
4309 case 0xF0: /* F2XM1 */
4313 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4317 case 0xF1: /* FYL2X */
4321 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4327 case 0xF2: { /* FPTAN */
4329 IRTemp argD
= newTemp(Ity_F64
);
4330 assign(argD
, get_ST(0));
4331 IRTemp argOK
= math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD
);
4332 IRTemp resD
= newTemp(Ity_F64
);
4337 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4341 put_ST_UNCHECKED(0, mkexpr(resD
));
4342 /* Conditionally push 1.0 on the stack, if the arg is
4344 maybe_fp_push(argOK
);
4345 maybe_put_ST(argOK
, 0,
4346 IRExpr_Const(IRConst_F64(1.0)));
4347 set_C2( binop(Iop_Xor32
,
4348 unop(Iop_1Uto32
, mkexpr(argOK
)),
4353 case 0xF3: /* FPATAN */
4357 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4363 case 0xF4: { /* FXTRACT */
4364 IRTemp argF
= newTemp(Ity_F64
);
4365 IRTemp sigF
= newTemp(Ity_F64
);
4366 IRTemp expF
= newTemp(Ity_F64
);
4367 IRTemp argI
= newTemp(Ity_I64
);
4368 IRTemp sigI
= newTemp(Ity_I64
);
4369 IRTemp expI
= newTemp(Ity_I64
);
4371 assign( argF
, get_ST(0) );
4372 assign( argI
, unop(Iop_ReinterpF64asI64
, mkexpr(argF
)));
4375 Ity_I64
, 0/*regparms*/,
4376 "x86amd64g_calculate_FXTRACT",
4377 &x86amd64g_calculate_FXTRACT
,
4378 mkIRExprVec_2( mkexpr(argI
),
4379 mkIRExpr_HWord(0)/*sig*/ ))
4383 Ity_I64
, 0/*regparms*/,
4384 "x86amd64g_calculate_FXTRACT",
4385 &x86amd64g_calculate_FXTRACT
,
4386 mkIRExprVec_2( mkexpr(argI
),
4387 mkIRExpr_HWord(1)/*exp*/ ))
4389 assign( sigF
, unop(Iop_ReinterpI64asF64
, mkexpr(sigI
)) );
4390 assign( expF
, unop(Iop_ReinterpI64asF64
, mkexpr(expI
)) );
4392 put_ST_UNCHECKED(0, mkexpr(expF
) );
4395 put_ST(0, mkexpr(sigF
) );
4399 case 0xF5: { /* FPREM1 -- IEEE compliant */
4400 IRTemp a1
= newTemp(Ity_F64
);
4401 IRTemp a2
= newTemp(Ity_F64
);
4403 /* Do FPREM1 twice, once to get the remainder, and once
4404 to get the C3210 flag values. */
4405 assign( a1
, get_ST(0) );
4406 assign( a2
, get_ST(1) );
4409 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4413 triop(Iop_PRem1C3210F64
,
4414 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4420 case 0xF7: /* FINCSTP */
4422 put_ftop( binop(Iop_Add32
, get_ftop(), mkU32(1)) );
4425 case 0xF8: { /* FPREM -- not IEEE compliant */
4426 IRTemp a1
= newTemp(Ity_F64
);
4427 IRTemp a2
= newTemp(Ity_F64
);
4429 /* Do FPREM twice, once to get the remainder, and once
4430 to get the C3210 flag values. */
4431 assign( a1
, get_ST(0) );
4432 assign( a2
, get_ST(1) );
4435 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4439 triop(Iop_PRemC3210F64
,
4440 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4446 case 0xF9: /* FYL2XP1 */
4449 triop(Iop_Yl2xp1F64
,
4450 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4456 case 0xFA: /* FSQRT */
4460 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4464 case 0xFB: { /* FSINCOS */
4466 IRTemp argD
= newTemp(Ity_F64
);
4467 assign(argD
, get_ST(0));
4468 IRTemp argOK
= math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD
);
4469 IRTemp resD
= newTemp(Ity_F64
);
4474 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4478 put_ST_UNCHECKED(0, mkexpr(resD
));
4479 /* Conditionally push the cos value on the stack, if
4480 the arg is in range */
4481 maybe_fp_push(argOK
);
4482 maybe_put_ST(argOK
, 0,
4484 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4486 set_C2( binop(Iop_Xor32
,
4487 unop(Iop_1Uto32
, mkexpr(argOK
)),
4492 case 0xFC: /* FRNDINT */
4495 binop(Iop_RoundF64toInt
, get_roundingmode(), get_ST(0)) );
4498 case 0xFD: /* FSCALE */
4502 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4507 case 0xFE: /* FSIN */
4508 case 0xFF: { /* FCOS */
4509 Bool isSIN
= modrm
== 0xFE;
4510 DIP("%s\n", isSIN
? "fsin" : "fcos");
4511 IRTemp argD
= newTemp(Ity_F64
);
4512 assign(argD
, get_ST(0));
4513 IRTemp argOK
= math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD
);
4514 IRTemp resD
= newTemp(Ity_F64
);
4518 binop(isSIN
? Iop_SinF64
: Iop_CosF64
,
4519 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4523 put_ST_UNCHECKED(0, mkexpr(resD
));
4524 set_C2( binop(Iop_Xor32
,
4525 unop(Iop_1Uto32
, mkexpr(argOK
)),
4536 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */
4538 if (first_opcode
== 0xDA) {
4542 /* bits 5,4,3 are an opcode extension, and the modRM also
4543 specifies an address. */
4545 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
4547 switch (gregOfRM(modrm
)) {
4549 case 0: /* FIADD m32int */ /* ST(0) += m32int */
4550 DIP("fiaddl %s\n", dis_buf
);
4554 case 1: /* FIMUL m32int */ /* ST(0) *= m32int */
4555 DIP("fimull %s\n", dis_buf
);
4559 case 2: /* FICOM m32int */
4560 DIP("ficoml %s\n", dis_buf
);
4561 /* This forces C1 to zero, which isn't right. */
4568 loadLE(Ity_I32
,mkexpr(addr
)))),
4574 case 3: /* FICOMP m32int */
4575 DIP("ficompl %s\n", dis_buf
);
4576 /* This forces C1 to zero, which isn't right. */
4583 loadLE(Ity_I32
,mkexpr(addr
)))),
4590 case 4: /* FISUB m32int */ /* ST(0) -= m32int */
4591 DIP("fisubl %s\n", dis_buf
);
4595 case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */
4596 DIP("fisubrl %s\n", dis_buf
);
4600 case 6: /* FIDIV m32int */ /* ST(0) /= m32int */
4601 DIP("fidivl %s\n", dis_buf
);
4605 case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */
4606 DIP("fidivrl %s\n", dis_buf
);
4613 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4616 loadLE(Ity_I32
, mkexpr(addr
)))));
4622 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4624 loadLE(Ity_I32
, mkexpr(addr
))),
4629 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt
)gregOfRM(modrm
));
4630 vex_printf("first_opcode == 0xDA\n");
4639 case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */
4640 r_src
= (UInt
)modrm
- 0xC0;
4641 DIP("fcmovb %%st(%u), %%st(0)\n", r_src
);
4644 mk_x86g_calculate_condition(X86CondB
),
4645 get_ST(r_src
), get_ST(0)) );
4648 case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */
4649 r_src
= (UInt
)modrm
- 0xC8;
4650 DIP("fcmovz %%st(%u), %%st(0)\n", r_src
);
4653 mk_x86g_calculate_condition(X86CondZ
),
4654 get_ST(r_src
), get_ST(0)) );
4657 case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */
4658 r_src
= (UInt
)modrm
- 0xD0;
4659 DIP("fcmovbe %%st(%u), %%st(0)\n", r_src
);
4662 mk_x86g_calculate_condition(X86CondBE
),
4663 get_ST(r_src
), get_ST(0)) );
4666 case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */
4667 r_src
= (UInt
)modrm
- 0xD8;
4668 DIP("fcmovu %%st(%u), %%st(0)\n", r_src
);
4671 mk_x86g_calculate_condition(X86CondP
),
4672 get_ST(r_src
), get_ST(0)) );
4675 case 0xE9: /* FUCOMPP %st(0),%st(1) */
4676 DIP("fucompp %%st(0),%%st(1)\n");
4677 /* This forces C1 to zero, which isn't right. */
4681 binop(Iop_CmpF64
, get_ST(0), get_ST(1)),
4696 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */
4698 if (first_opcode
== 0xDB) {
4701 /* bits 5,4,3 are an opcode extension, and the modRM also
4702 specifies an address. */
4703 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
4706 switch (gregOfRM(modrm
)) {
4708 case 0: /* FILD m32int */
4709 DIP("fildl %s\n", dis_buf
);
4711 put_ST(0, unop(Iop_I32StoF64
,
4712 loadLE(Ity_I32
, mkexpr(addr
))));
4715 case 1: /* FISTTPL m32 (SSE3) */
4716 DIP("fisttpl %s\n", dis_buf
);
4717 storeLE( mkexpr(addr
),
4718 binop(Iop_F64toI32S
, mkU32(Irrm_ZERO
), get_ST(0)) );
4722 case 2: /* FIST m32 */
4723 DIP("fistl %s\n", dis_buf
);
4724 storeLE( mkexpr(addr
),
4725 binop(Iop_F64toI32S
, get_roundingmode(), get_ST(0)) );
4728 case 3: /* FISTP m32 */
4729 DIP("fistpl %s\n", dis_buf
);
4730 storeLE( mkexpr(addr
),
4731 binop(Iop_F64toI32S
, get_roundingmode(), get_ST(0)) );
4735 case 5: { /* FLD extended-real */
4736 /* Uses dirty helper:
4737 ULong x86g_loadF80le ( UInt )
4738 addr holds the address. First, do a dirty call to
4739 get hold of the data. */
4740 IRTemp val
= newTemp(Ity_I64
);
4741 IRExpr
** args
= mkIRExprVec_1 ( mkexpr(addr
) );
4743 IRDirty
* d
= unsafeIRDirty_1_N (
4746 "x86g_dirtyhelper_loadF80le",
4747 &x86g_dirtyhelper_loadF80le
,
4750 /* declare that we're reading memory */
4752 d
->mAddr
= mkexpr(addr
);
4755 /* execute the dirty call, dumping the result in val. */
4756 stmt( IRStmt_Dirty(d
) );
4758 put_ST(0, unop(Iop_ReinterpI64asF64
, mkexpr(val
)));
4760 DIP("fldt %s\n", dis_buf
);
4764 case 7: { /* FSTP extended-real */
4765 /* Uses dirty helper: void x86g_storeF80le ( UInt, ULong ) */
4767 = mkIRExprVec_2( mkexpr(addr
),
4768 unop(Iop_ReinterpF64asI64
, get_ST(0)) );
4770 IRDirty
* d
= unsafeIRDirty_0_N (
4772 "x86g_dirtyhelper_storeF80le",
4773 &x86g_dirtyhelper_storeF80le
,
4776 /* declare we're writing memory */
4778 d
->mAddr
= mkexpr(addr
);
4781 /* execute the dirty call. */
4782 stmt( IRStmt_Dirty(d
) );
4785 DIP("fstpt\n %s", dis_buf
);
4790 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt
)gregOfRM(modrm
));
4791 vex_printf("first_opcode == 0xDB\n");
4800 case 0xC0 ... 0xC7: /* FCMOVNB ST(i), ST(0) */
4801 r_src
= (UInt
)modrm
- 0xC0;
4802 DIP("fcmovnb %%st(%u), %%st(0)\n", r_src
);
4805 mk_x86g_calculate_condition(X86CondNB
),
4806 get_ST(r_src
), get_ST(0)) );
4809 case 0xC8 ... 0xCF: /* FCMOVNE(NZ) ST(i), ST(0) */
4810 r_src
= (UInt
)modrm
- 0xC8;
4811 DIP("fcmovnz %%st(%u), %%st(0)\n", r_src
);
4814 mk_x86g_calculate_condition(X86CondNZ
),
4815 get_ST(r_src
), get_ST(0)) );
4818 case 0xD0 ... 0xD7: /* FCMOVNBE ST(i), ST(0) */
4819 r_src
= (UInt
)modrm
- 0xD0;
4820 DIP("fcmovnbe %%st(%u), %%st(0)\n", r_src
);
4823 mk_x86g_calculate_condition(X86CondNBE
),
4824 get_ST(r_src
), get_ST(0)) );
4827 case 0xD8 ... 0xDF: /* FCMOVNU ST(i), ST(0) */
4828 r_src
= (UInt
)modrm
- 0xD8;
4829 DIP("fcmovnu %%st(%u), %%st(0)\n", r_src
);
4832 mk_x86g_calculate_condition(X86CondNP
),
4833 get_ST(r_src
), get_ST(0)) );
4841 /* Uses dirty helper:
4842 void x86g_do_FINIT ( VexGuestX86State* ) */
4843 IRDirty
* d
= unsafeIRDirty_0_N (
4845 "x86g_dirtyhelper_FINIT",
4846 &x86g_dirtyhelper_FINIT
,
4847 mkIRExprVec_1(IRExpr_GSPTR())
4850 /* declare we're writing guest state */
4852 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
4854 d
->fxState
[0].fx
= Ifx_Write
;
4855 d
->fxState
[0].offset
= OFFB_FTOP
;
4856 d
->fxState
[0].size
= sizeof(UInt
);
4858 d
->fxState
[1].fx
= Ifx_Write
;
4859 d
->fxState
[1].offset
= OFFB_FPREGS
;
4860 d
->fxState
[1].size
= 8 * sizeof(ULong
);
4862 d
->fxState
[2].fx
= Ifx_Write
;
4863 d
->fxState
[2].offset
= OFFB_FPTAGS
;
4864 d
->fxState
[2].size
= 8 * sizeof(UChar
);
4866 d
->fxState
[3].fx
= Ifx_Write
;
4867 d
->fxState
[3].offset
= OFFB_FPROUND
;
4868 d
->fxState
[3].size
= sizeof(UInt
);
4870 d
->fxState
[4].fx
= Ifx_Write
;
4871 d
->fxState
[4].offset
= OFFB_FC3210
;
4872 d
->fxState
[4].size
= sizeof(UInt
);
4874 stmt( IRStmt_Dirty(d
) );
4880 case 0xE8 ... 0xEF: /* FUCOMI %st(0),%st(?) */
4881 fp_do_ucomi_ST0_STi( (UInt
)modrm
- 0xE8, False
);
4884 case 0xF0 ... 0xF7: /* FCOMI %st(0),%st(?) */
4885 fp_do_ucomi_ST0_STi( (UInt
)modrm
- 0xF0, False
);
4894 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */
4896 if (first_opcode
== 0xDC) {
4899 /* bits 5,4,3 are an opcode extension, and the modRM also
4900 specifies an address. */
4901 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
4904 switch (gregOfRM(modrm
)) {
4906 case 0: /* FADD double-real */
4907 fp_do_op_mem_ST_0 ( addr
, "add", dis_buf
, Iop_AddF64
, True
);
4910 case 1: /* FMUL double-real */
4911 fp_do_op_mem_ST_0 ( addr
, "mul", dis_buf
, Iop_MulF64
, True
);
4914 case 2: /* FCOM double-real */
4915 DIP("fcoml %s\n", dis_buf
);
4916 /* This forces C1 to zero, which isn't right. */
4922 loadLE(Ity_F64
,mkexpr(addr
))),
4928 case 3: /* FCOMP double-real */
4929 DIP("fcompl %s\n", dis_buf
);
4930 /* This forces C1 to zero, which isn't right. */
4936 loadLE(Ity_F64
,mkexpr(addr
))),
4943 case 4: /* FSUB double-real */
4944 fp_do_op_mem_ST_0 ( addr
, "sub", dis_buf
, Iop_SubF64
, True
);
4947 case 5: /* FSUBR double-real */
4948 fp_do_oprev_mem_ST_0 ( addr
, "subr", dis_buf
, Iop_SubF64
, True
);
4951 case 6: /* FDIV double-real */
4952 fp_do_op_mem_ST_0 ( addr
, "div", dis_buf
, Iop_DivF64
, True
);
4955 case 7: /* FDIVR double-real */
4956 fp_do_oprev_mem_ST_0 ( addr
, "divr", dis_buf
, Iop_DivF64
, True
);
4960 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt
)gregOfRM(modrm
));
4961 vex_printf("first_opcode == 0xDC\n");
4970 case 0xC0 ... 0xC7: /* FADD %st(0),%st(?) */
4971 fp_do_op_ST_ST ( "add", Iop_AddF64
, 0, modrm
- 0xC0, False
);
4974 case 0xC8 ... 0xCF: /* FMUL %st(0),%st(?) */
4975 fp_do_op_ST_ST ( "mul", Iop_MulF64
, 0, modrm
- 0xC8, False
);
4978 case 0xE0 ... 0xE7: /* FSUBR %st(0),%st(?) */
4979 fp_do_oprev_ST_ST ( "subr", Iop_SubF64
, 0, modrm
- 0xE0, False
);
4982 case 0xE8 ... 0xEF: /* FSUB %st(0),%st(?) */
4983 fp_do_op_ST_ST ( "sub", Iop_SubF64
, 0, modrm
- 0xE8, False
);
4986 case 0xF0 ... 0xF7: /* FDIVR %st(0),%st(?) */
4987 fp_do_oprev_ST_ST ( "divr", Iop_DivF64
, 0, modrm
- 0xF0, False
);
4990 case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */
4991 fp_do_op_ST_ST ( "div", Iop_DivF64
, 0, modrm
- 0xF8, False
);
5001 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */
5003 if (first_opcode
== 0xDD) {
5007 /* bits 5,4,3 are an opcode extension, and the modRM also
5008 specifies an address. */
5009 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
5012 switch (gregOfRM(modrm
)) {
5014 case 0: /* FLD double-real */
5015 DIP("fldl %s\n", dis_buf
);
5017 put_ST(0, loadLE(Ity_F64
, mkexpr(addr
)));
5020 case 1: /* FISTTPQ m64 (SSE3) */
5021 DIP("fistppll %s\n", dis_buf
);
5022 storeLE( mkexpr(addr
),
5023 binop(Iop_F64toI64S
, mkU32(Irrm_ZERO
), get_ST(0)) );
5027 case 2: /* FST double-real */
5028 DIP("fstl %s\n", dis_buf
);
5029 storeLE(mkexpr(addr
), get_ST(0));
5032 case 3: /* FSTP double-real */
5033 DIP("fstpl %s\n", dis_buf
);
5034 storeLE(mkexpr(addr
), get_ST(0));
5038 case 4: { /* FRSTOR m108 */
5039 /* Uses dirty helper:
5040 VexEmNote x86g_do_FRSTOR ( VexGuestX86State*, Addr32 ) */
5041 IRTemp ew
= newTemp(Ity_I32
);
5042 IRDirty
* d
= unsafeIRDirty_0_N (
5044 "x86g_dirtyhelper_FRSTOR",
5045 &x86g_dirtyhelper_FRSTOR
,
5046 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
5049 /* declare we're reading memory */
5051 d
->mAddr
= mkexpr(addr
);
5054 /* declare we're writing guest state */
5056 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
5058 d
->fxState
[0].fx
= Ifx_Write
;
5059 d
->fxState
[0].offset
= OFFB_FTOP
;
5060 d
->fxState
[0].size
= sizeof(UInt
);
5062 d
->fxState
[1].fx
= Ifx_Write
;
5063 d
->fxState
[1].offset
= OFFB_FPREGS
;
5064 d
->fxState
[1].size
= 8 * sizeof(ULong
);
5066 d
->fxState
[2].fx
= Ifx_Write
;
5067 d
->fxState
[2].offset
= OFFB_FPTAGS
;
5068 d
->fxState
[2].size
= 8 * sizeof(UChar
);
5070 d
->fxState
[3].fx
= Ifx_Write
;
5071 d
->fxState
[3].offset
= OFFB_FPROUND
;
5072 d
->fxState
[3].size
= sizeof(UInt
);
5074 d
->fxState
[4].fx
= Ifx_Write
;
5075 d
->fxState
[4].offset
= OFFB_FC3210
;
5076 d
->fxState
[4].size
= sizeof(UInt
);
5078 stmt( IRStmt_Dirty(d
) );
5080 /* ew contains any emulation warning we may need to
5081 issue. If needed, side-exit to the next insn,
5082 reporting the warning, so that Valgrind's dispatcher
5083 sees the warning. */
5084 put_emwarn( mkexpr(ew
) );
5087 binop(Iop_CmpNE32
, mkexpr(ew
), mkU32(0)),
5089 IRConst_U32( ((Addr32
)guest_EIP_bbstart
)+delta
),
5094 DIP("frstor %s\n", dis_buf
);
5098 case 6: { /* FNSAVE m108 */
5099 /* Uses dirty helper:
5100 void x86g_do_FSAVE ( VexGuestX86State*, UInt ) */
5101 IRDirty
* d
= unsafeIRDirty_0_N (
5103 "x86g_dirtyhelper_FSAVE",
5104 &x86g_dirtyhelper_FSAVE
,
5105 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
5107 /* declare we're writing memory */
5109 d
->mAddr
= mkexpr(addr
);
5112 /* declare we're reading guest state */
5114 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
5116 d
->fxState
[0].fx
= Ifx_Read
;
5117 d
->fxState
[0].offset
= OFFB_FTOP
;
5118 d
->fxState
[0].size
= sizeof(UInt
);
5120 d
->fxState
[1].fx
= Ifx_Read
;
5121 d
->fxState
[1].offset
= OFFB_FPREGS
;
5122 d
->fxState
[1].size
= 8 * sizeof(ULong
);
5124 d
->fxState
[2].fx
= Ifx_Read
;
5125 d
->fxState
[2].offset
= OFFB_FPTAGS
;
5126 d
->fxState
[2].size
= 8 * sizeof(UChar
);
5128 d
->fxState
[3].fx
= Ifx_Read
;
5129 d
->fxState
[3].offset
= OFFB_FPROUND
;
5130 d
->fxState
[3].size
= sizeof(UInt
);
5132 d
->fxState
[4].fx
= Ifx_Read
;
5133 d
->fxState
[4].offset
= OFFB_FC3210
;
5134 d
->fxState
[4].size
= sizeof(UInt
);
5136 stmt( IRStmt_Dirty(d
) );
5138 DIP("fnsave %s\n", dis_buf
);
5142 case 7: { /* FNSTSW m16 */
5143 IRExpr
* sw
= get_FPU_sw();
5144 vassert(typeOfIRExpr(irsb
->tyenv
, sw
) == Ity_I16
);
5145 storeLE( mkexpr(addr
), sw
);
5146 DIP("fnstsw %s\n", dis_buf
);
5151 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt
)gregOfRM(modrm
));
5152 vex_printf("first_opcode == 0xDD\n");
5159 case 0xC0 ... 0xC7: /* FFREE %st(?) */
5160 r_dst
= (UInt
)modrm
- 0xC0;
5161 DIP("ffree %%st(%u)\n", r_dst
);
5162 put_ST_TAG ( r_dst
, mkU8(0) );
5165 case 0xD0 ... 0xD7: /* FST %st(0),%st(?) */
5166 r_dst
= (UInt
)modrm
- 0xD0;
5167 DIP("fst %%st(0),%%st(%u)\n", r_dst
);
5168 /* P4 manual says: "If the destination operand is a
5169 non-empty register, the invalid-operation exception
5170 is not generated. Hence put_ST_UNCHECKED. */
5171 put_ST_UNCHECKED(r_dst
, get_ST(0));
5174 case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */
5175 r_dst
= (UInt
)modrm
- 0xD8;
5176 DIP("fstp %%st(0),%%st(%u)\n", r_dst
);
5177 /* P4 manual says: "If the destination operand is a
5178 non-empty register, the invalid-operation exception
5179 is not generated. Hence put_ST_UNCHECKED. */
5180 put_ST_UNCHECKED(r_dst
, get_ST(0));
5184 case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */
5185 r_dst
= (UInt
)modrm
- 0xE0;
5186 DIP("fucom %%st(0),%%st(%u)\n", r_dst
);
5187 /* This forces C1 to zero, which isn't right. */
5191 binop(Iop_CmpF64
, get_ST(0), get_ST(r_dst
)),
5197 case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */
5198 r_dst
= (UInt
)modrm
- 0xE8;
5199 DIP("fucomp %%st(0),%%st(%u)\n", r_dst
);
5200 /* This forces C1 to zero, which isn't right. */
5204 binop(Iop_CmpF64
, get_ST(0), get_ST(r_dst
)),
5217 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */
5219 if (first_opcode
== 0xDE) {
5223 /* bits 5,4,3 are an opcode extension, and the modRM also
5224 specifies an address. */
5226 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
5229 switch (gregOfRM(modrm
)) {
5231 case 0: /* FIADD m16int */ /* ST(0) += m16int */
5232 DIP("fiaddw %s\n", dis_buf
);
5236 case 1: /* FIMUL m16int */ /* ST(0) *= m16int */
5237 DIP("fimulw %s\n", dis_buf
);
5241 case 2: /* FICOM m16int */
5242 DIP("ficomw %s\n", dis_buf
);
5243 /* This forces C1 to zero, which isn't right. */
5251 loadLE(Ity_I16
,mkexpr(addr
))))),
5257 case 3: /* FICOMP m16int */
5258 DIP("ficompw %s\n", dis_buf
);
5259 /* This forces C1 to zero, which isn't right. */
5267 loadLE(Ity_I16
,mkexpr(addr
))))),
5274 case 4: /* FISUB m16int */ /* ST(0) -= m16int */
5275 DIP("fisubw %s\n", dis_buf
);
5279 case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */
5280 DIP("fisubrw %s\n", dis_buf
);
5284 case 6: /* FIDIV m16int */ /* ST(0) /= m16int */
5285 DIP("fisubw %s\n", dis_buf
);
5289 case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */
5290 DIP("fidivrw %s\n", dis_buf
);
5297 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5301 loadLE(Ity_I16
, mkexpr(addr
))))));
5307 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5310 loadLE(Ity_I16
, mkexpr(addr
)))),
5315 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt
)gregOfRM(modrm
));
5316 vex_printf("first_opcode == 0xDE\n");
5325 case 0xC0 ... 0xC7: /* FADDP %st(0),%st(?) */
5326 fp_do_op_ST_ST ( "add", Iop_AddF64
, 0, modrm
- 0xC0, True
);
5329 case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */
5330 fp_do_op_ST_ST ( "mul", Iop_MulF64
, 0, modrm
- 0xC8, True
);
5333 case 0xD9: /* FCOMPP %st(0),%st(1) */
5334 DIP("fuompp %%st(0),%%st(1)\n");
5335 /* This forces C1 to zero, which isn't right. */
5339 binop(Iop_CmpF64
, get_ST(0), get_ST(1)),
5347 case 0xE0 ... 0xE7: /* FSUBRP %st(0),%st(?) */
5348 fp_do_oprev_ST_ST ( "subr", Iop_SubF64
, 0, modrm
- 0xE0, True
);
5351 case 0xE8 ... 0xEF: /* FSUBP %st(0),%st(?) */
5352 fp_do_op_ST_ST ( "sub", Iop_SubF64
, 0, modrm
- 0xE8, True
);
5355 case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */
5356 fp_do_oprev_ST_ST ( "divr", Iop_DivF64
, 0, modrm
- 0xF0, True
);
5359 case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */
5360 fp_do_op_ST_ST ( "div", Iop_DivF64
, 0, modrm
- 0xF8, True
);
5370 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */
5372 if (first_opcode
== 0xDF) {
5376 /* bits 5,4,3 are an opcode extension, and the modRM also
5377 specifies an address. */
5378 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
5381 switch (gregOfRM(modrm
)) {
5383 case 0: /* FILD m16int */
5384 DIP("fildw %s\n", dis_buf
);
5386 put_ST(0, unop(Iop_I32StoF64
,
5388 loadLE(Ity_I16
, mkexpr(addr
)))));
5391 case 1: /* FISTTPS m16 (SSE3) */
5392 DIP("fisttps %s\n", dis_buf
);
5393 storeLE( mkexpr(addr
),
5394 binop(Iop_F64toI16S
, mkU32(Irrm_ZERO
), get_ST(0)) );
5398 case 2: /* FIST m16 */
5399 DIP("fistp %s\n", dis_buf
);
5400 storeLE( mkexpr(addr
),
5401 binop(Iop_F64toI16S
, get_roundingmode(), get_ST(0)) );
5404 case 3: /* FISTP m16 */
5405 DIP("fistps %s\n", dis_buf
);
5406 storeLE( mkexpr(addr
),
5407 binop(Iop_F64toI16S
, get_roundingmode(), get_ST(0)) );
5411 case 5: /* FILD m64 */
5412 DIP("fildll %s\n", dis_buf
);
5414 put_ST(0, binop(Iop_I64StoF64
,
5416 loadLE(Ity_I64
, mkexpr(addr
))));
5419 case 7: /* FISTP m64 */
5420 DIP("fistpll %s\n", dis_buf
);
5421 storeLE( mkexpr(addr
),
5422 binop(Iop_F64toI64S
, get_roundingmode(), get_ST(0)) );
5427 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt
)gregOfRM(modrm
));
5428 vex_printf("first_opcode == 0xDF\n");
5437 case 0xC0: /* FFREEP %st(0) */
5438 DIP("ffreep %%st(%d)\n", 0);
5439 put_ST_TAG ( 0, mkU8(0) );
5443 case 0xE0: /* FNSTSW %ax */
5444 DIP("fnstsw %%ax\n");
5445 /* Get the FPU status word value and dump it in %AX. */
5447 /* The obvious thing to do is simply dump the 16-bit
5448 status word value in %AX. However, due to a
5449 limitation in Memcheck's origin tracking
5450 machinery, this causes Memcheck not to track the
5451 origin of any undefinedness into %AH (only into
5452 %AL/%AX/%EAX), which means origins are lost in
5453 the sequence "fnstsw %ax; test $M,%ah; jcond .." */
5454 putIReg(2, R_EAX
, get_FPU_sw());
5456 /* So a somewhat lame kludge is to make it very
5457 clear to Memcheck that the value is written to
5458 both %AH and %AL. This generates marginally
5459 worse code, but I don't think it matters much. */
5460 IRTemp t16
= newTemp(Ity_I16
);
5461 assign(t16
, get_FPU_sw());
5462 putIReg( 1, R_AL
, unop(Iop_16to8
, mkexpr(t16
)) );
5463 putIReg( 1, R_AH
, unop(Iop_16HIto8
, mkexpr(t16
)) );
5467 case 0xE8 ... 0xEF: /* FUCOMIP %st(0),%st(?) */
5468 fp_do_ucomi_ST0_STi( (UInt
)modrm
- 0xE8, True
);
5471 case 0xF0 ... 0xF7: /* FCOMIP %st(0),%st(?) */
5472 /* not really right since COMIP != UCOMIP */
5473 fp_do_ucomi_ST0_STi( (UInt
)modrm
- 0xF0, True
);
5484 vpanic("dis_FPU(x86): invalid primary opcode");
5495 /*------------------------------------------------------------*/
5497 /*--- MMX INSTRUCTIONS ---*/
5499 /*------------------------------------------------------------*/
5501 /* Effect of MMX insns on x87 FPU state (table 11-2 of
5502 IA32 arch manual, volume 3):
5504 Read from, or write to MMX register (viz, any insn except EMMS):
5505 * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero
5506 * FP stack pointer set to zero
5509 * All tags set to Invalid (empty) -- FPTAGS[i] := zero
5510 * FP stack pointer set to zero
5513 static void do_MMX_preamble ( void )
5516 IRRegArray
* descr
= mkIRRegArray( OFFB_FPTAGS
, Ity_I8
, 8 );
5517 IRExpr
* zero
= mkU32(0);
5518 IRExpr
* tag1
= mkU8(1);
5520 for (i
= 0; i
< 8; i
++)
5521 stmt( IRStmt_PutI( mkIRPutI(descr
, zero
, i
, tag1
) ) );
5524 static void do_EMMS_preamble ( void )
5527 IRRegArray
* descr
= mkIRRegArray( OFFB_FPTAGS
, Ity_I8
, 8 );
5528 IRExpr
* zero
= mkU32(0);
5529 IRExpr
* tag0
= mkU8(0);
5531 for (i
= 0; i
< 8; i
++)
5532 stmt( IRStmt_PutI( mkIRPutI(descr
, zero
, i
, tag0
) ) );
5536 static IRExpr
* getMMXReg ( UInt archreg
)
5538 vassert(archreg
< 8);
5539 return IRExpr_Get( OFFB_FPREGS
+ 8 * archreg
, Ity_I64
);
5543 static void putMMXReg ( UInt archreg
, IRExpr
* e
)
5545 vassert(archreg
< 8);
5546 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I64
);
5547 stmt( IRStmt_Put( OFFB_FPREGS
+ 8 * archreg
, e
) );
5551 /* Helper for non-shift MMX insns. Note this is incomplete in the
5552 sense that it does not first call do_MMX_preamble() -- that is the
5553 responsibility of its caller. */
5556 UInt
dis_MMXop_regmem_to_reg ( UChar sorb
,
5560 Bool show_granularity
)
5563 UChar modrm
= getIByte(delta
);
5564 Bool isReg
= epartIsReg(modrm
);
5565 IRExpr
* argL
= NULL
;
5566 IRExpr
* argR
= NULL
;
5567 IRExpr
* argG
= NULL
;
5568 IRExpr
* argE
= NULL
;
5569 IRTemp res
= newTemp(Ity_I64
);
5572 IROp op
= Iop_INVALID
;
5575 const HChar
* hName
= NULL
;
5577 # define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0)
5580 /* Original MMX ones */
5581 case 0xFC: op
= Iop_Add8x8
; break;
5582 case 0xFD: op
= Iop_Add16x4
; break;
5583 case 0xFE: op
= Iop_Add32x2
; break;
5585 case 0xEC: op
= Iop_QAdd8Sx8
; break;
5586 case 0xED: op
= Iop_QAdd16Sx4
; break;
5588 case 0xDC: op
= Iop_QAdd8Ux8
; break;
5589 case 0xDD: op
= Iop_QAdd16Ux4
; break;
5591 case 0xF8: op
= Iop_Sub8x8
; break;
5592 case 0xF9: op
= Iop_Sub16x4
; break;
5593 case 0xFA: op
= Iop_Sub32x2
; break;
5595 case 0xE8: op
= Iop_QSub8Sx8
; break;
5596 case 0xE9: op
= Iop_QSub16Sx4
; break;
5598 case 0xD8: op
= Iop_QSub8Ux8
; break;
5599 case 0xD9: op
= Iop_QSub16Ux4
; break;
5601 case 0xE5: op
= Iop_MulHi16Sx4
; break;
5602 case 0xD5: op
= Iop_Mul16x4
; break;
5603 case 0xF5: XXX(x86g_calculate_mmx_pmaddwd
); break;
5605 case 0x74: op
= Iop_CmpEQ8x8
; break;
5606 case 0x75: op
= Iop_CmpEQ16x4
; break;
5607 case 0x76: op
= Iop_CmpEQ32x2
; break;
5609 case 0x64: op
= Iop_CmpGT8Sx8
; break;
5610 case 0x65: op
= Iop_CmpGT16Sx4
; break;
5611 case 0x66: op
= Iop_CmpGT32Sx2
; break;
5613 case 0x6B: op
= Iop_QNarrowBin32Sto16Sx4
; eLeft
= True
; break;
5614 case 0x63: op
= Iop_QNarrowBin16Sto8Sx8
; eLeft
= True
; break;
5615 case 0x67: op
= Iop_QNarrowBin16Sto8Ux8
; eLeft
= True
; break;
5617 case 0x68: op
= Iop_InterleaveHI8x8
; eLeft
= True
; break;
5618 case 0x69: op
= Iop_InterleaveHI16x4
; eLeft
= True
; break;
5619 case 0x6A: op
= Iop_InterleaveHI32x2
; eLeft
= True
; break;
5621 case 0x60: op
= Iop_InterleaveLO8x8
; eLeft
= True
; break;
5622 case 0x61: op
= Iop_InterleaveLO16x4
; eLeft
= True
; break;
5623 case 0x62: op
= Iop_InterleaveLO32x2
; eLeft
= True
; break;
5625 case 0xDB: op
= Iop_And64
; break;
5626 case 0xDF: op
= Iop_And64
; invG
= True
; break;
5627 case 0xEB: op
= Iop_Or64
; break;
5628 case 0xEF: /* Possibly do better here if argL and argR are the
5630 op
= Iop_Xor64
; break;
5632 /* Introduced in SSE1 */
5633 case 0xE0: op
= Iop_Avg8Ux8
; break;
5634 case 0xE3: op
= Iop_Avg16Ux4
; break;
5635 case 0xEE: op
= Iop_Max16Sx4
; break;
5636 case 0xDE: op
= Iop_Max8Ux8
; break;
5637 case 0xEA: op
= Iop_Min16Sx4
; break;
5638 case 0xDA: op
= Iop_Min8Ux8
; break;
5639 case 0xE4: op
= Iop_MulHi16Ux4
; break;
5640 case 0xF6: XXX(x86g_calculate_mmx_psadbw
); break;
5642 /* Introduced in SSE2 */
5643 case 0xD4: op
= Iop_Add64
; break;
5644 case 0xFB: op
= Iop_Sub64
; break;
5647 vex_printf("\n0x%x\n", opc
);
5648 vpanic("dis_MMXop_regmem_to_reg");
5653 argG
= getMMXReg(gregOfRM(modrm
));
5655 argG
= unop(Iop_Not64
, argG
);
5659 argE
= getMMXReg(eregOfRM(modrm
));
5662 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
5664 argE
= loadLE(Ity_I64
, mkexpr(addr
));
5675 if (op
!= Iop_INVALID
) {
5676 vassert(hName
== NULL
);
5677 vassert(hAddr
== NULL
);
5678 assign(res
, binop(op
, argL
, argR
));
5680 vassert(hName
!= NULL
);
5681 vassert(hAddr
!= NULL
);
5685 0/*regparms*/, hName
, hAddr
,
5686 mkIRExprVec_2( argL
, argR
)
5691 putMMXReg( gregOfRM(modrm
), mkexpr(res
) );
5693 DIP("%s%s %s, %s\n",
5694 name
, show_granularity
? nameMMXGran(opc
& 3) : "",
5695 ( isReg
? nameMMXReg(eregOfRM(modrm
)) : dis_buf
),
5696 nameMMXReg(gregOfRM(modrm
)) );
5702 /* Vector by scalar shift of G by the amount specified at the bottom
5703 of E. This is a straight copy of dis_SSE_shiftG_byE. */
5705 static UInt
dis_MMX_shiftG_byE ( UChar sorb
, Int delta
,
5706 const HChar
* opname
, IROp op
)
5712 UChar rm
= getIByte(delta
);
5713 IRTemp g0
= newTemp(Ity_I64
);
5714 IRTemp g1
= newTemp(Ity_I64
);
5715 IRTemp amt
= newTemp(Ity_I32
);
5716 IRTemp amt8
= newTemp(Ity_I8
);
5718 if (epartIsReg(rm
)) {
5719 assign( amt
, unop(Iop_64to32
, getMMXReg(eregOfRM(rm
))) );
5720 DIP("%s %s,%s\n", opname
,
5721 nameMMXReg(eregOfRM(rm
)),
5722 nameMMXReg(gregOfRM(rm
)) );
5725 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
5726 assign( amt
, loadLE(Ity_I32
, mkexpr(addr
)) );
5727 DIP("%s %s,%s\n", opname
,
5729 nameMMXReg(gregOfRM(rm
)) );
5732 assign( g0
, getMMXReg(gregOfRM(rm
)) );
5733 assign( amt8
, unop(Iop_32to8
, mkexpr(amt
)) );
5735 shl
= shr
= sar
= False
;
5738 case Iop_ShlN16x4
: shl
= True
; size
= 32; break;
5739 case Iop_ShlN32x2
: shl
= True
; size
= 32; break;
5740 case Iop_Shl64
: shl
= True
; size
= 64; break;
5741 case Iop_ShrN16x4
: shr
= True
; size
= 16; break;
5742 case Iop_ShrN32x2
: shr
= True
; size
= 32; break;
5743 case Iop_Shr64
: shr
= True
; size
= 64; break;
5744 case Iop_SarN16x4
: sar
= True
; size
= 16; break;
5745 case Iop_SarN32x2
: sar
= True
; size
= 32; break;
5746 default: vassert(0);
5753 binop(Iop_CmpLT32U
,mkexpr(amt
),mkU32(size
)),
5754 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
5763 binop(Iop_CmpLT32U
,mkexpr(amt
),mkU32(size
)),
5764 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
5765 binop(op
, mkexpr(g0
), mkU8(size
-1))
5773 putMMXReg( gregOfRM(rm
), mkexpr(g1
) );
5778 /* Vector by scalar shift of E by an immediate byte. This is a
5779 straight copy of dis_SSE_shiftE_imm. */
5782 UInt
dis_MMX_shiftE_imm ( Int delta
, const HChar
* opname
, IROp op
)
5785 UChar rm
= getIByte(delta
);
5786 IRTemp e0
= newTemp(Ity_I64
);
5787 IRTemp e1
= newTemp(Ity_I64
);
5789 vassert(epartIsReg(rm
));
5790 vassert(gregOfRM(rm
) == 2
5791 || gregOfRM(rm
) == 4 || gregOfRM(rm
) == 6);
5792 amt
= getIByte(delta
+1);
5794 DIP("%s $%d,%s\n", opname
,
5796 nameMMXReg(eregOfRM(rm
)) );
5798 assign( e0
, getMMXReg(eregOfRM(rm
)) );
5800 shl
= shr
= sar
= False
;
5803 case Iop_ShlN16x4
: shl
= True
; size
= 16; break;
5804 case Iop_ShlN32x2
: shl
= True
; size
= 32; break;
5805 case Iop_Shl64
: shl
= True
; size
= 64; break;
5806 case Iop_SarN16x4
: sar
= True
; size
= 16; break;
5807 case Iop_SarN32x2
: sar
= True
; size
= 32; break;
5808 case Iop_ShrN16x4
: shr
= True
; size
= 16; break;
5809 case Iop_ShrN32x2
: shr
= True
; size
= 32; break;
5810 case Iop_Shr64
: shr
= True
; size
= 64; break;
5811 default: vassert(0);
5815 assign( e1
, amt
>= size
5817 : binop(op
, mkexpr(e0
), mkU8(amt
))
5821 assign( e1
, amt
>= size
5822 ? binop(op
, mkexpr(e0
), mkU8(size
-1))
5823 : binop(op
, mkexpr(e0
), mkU8(amt
))
5830 putMMXReg( eregOfRM(rm
), mkexpr(e1
) );
5835 /* Completely handle all MMX instructions except emms. */
5838 UInt
dis_MMX ( Bool
* decode_ok
, UChar sorb
, Int sz
, Int delta
)
5843 UChar opc
= getIByte(delta
);
5846 /* dis_MMX handles all insns except emms. */
5852 /* MOVD (src)ireg-or-mem (E), (dst)mmxreg (G)*/
5854 goto mmx_decode_failure
;
5855 modrm
= getIByte(delta
);
5856 if (epartIsReg(modrm
)) {
5860 binop( Iop_32HLto64
,
5862 getIReg(4, eregOfRM(modrm
)) ) );
5863 DIP("movd %s, %s\n",
5864 nameIReg(4,eregOfRM(modrm
)), nameMMXReg(gregOfRM(modrm
)));
5866 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
5870 binop( Iop_32HLto64
,
5872 loadLE(Ity_I32
, mkexpr(addr
)) ) );
5873 DIP("movd %s, %s\n", dis_buf
, nameMMXReg(gregOfRM(modrm
)));
5877 case 0x7E: /* MOVD (src)mmxreg (G), (dst)ireg-or-mem (E) */
5879 goto mmx_decode_failure
;
5880 modrm
= getIByte(delta
);
5881 if (epartIsReg(modrm
)) {
5883 putIReg( 4, eregOfRM(modrm
),
5884 unop(Iop_64to32
, getMMXReg(gregOfRM(modrm
)) ) );
5885 DIP("movd %s, %s\n",
5886 nameMMXReg(gregOfRM(modrm
)), nameIReg(4,eregOfRM(modrm
)));
5888 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
5890 storeLE( mkexpr(addr
),
5891 unop(Iop_64to32
, getMMXReg(gregOfRM(modrm
)) ) );
5892 DIP("movd %s, %s\n", nameMMXReg(gregOfRM(modrm
)), dis_buf
);
5897 /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
5899 goto mmx_decode_failure
;
5900 modrm
= getIByte(delta
);
5901 if (epartIsReg(modrm
)) {
5903 putMMXReg( gregOfRM(modrm
), getMMXReg(eregOfRM(modrm
)) );
5904 DIP("movq %s, %s\n",
5905 nameMMXReg(eregOfRM(modrm
)), nameMMXReg(gregOfRM(modrm
)));
5907 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
5909 putMMXReg( gregOfRM(modrm
), loadLE(Ity_I64
, mkexpr(addr
)) );
5910 DIP("movq %s, %s\n",
5911 dis_buf
, nameMMXReg(gregOfRM(modrm
)));
5916 /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
5918 goto mmx_decode_failure
;
5919 modrm
= getIByte(delta
);
5920 if (epartIsReg(modrm
)) {
5922 putMMXReg( eregOfRM(modrm
), getMMXReg(gregOfRM(modrm
)) );
5923 DIP("movq %s, %s\n",
5924 nameMMXReg(gregOfRM(modrm
)), nameMMXReg(eregOfRM(modrm
)));
5926 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
5928 storeLE( mkexpr(addr
), getMMXReg(gregOfRM(modrm
)) );
5929 DIP("mov(nt)q %s, %s\n",
5930 nameMMXReg(gregOfRM(modrm
)), dis_buf
);
5936 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
5938 goto mmx_decode_failure
;
5939 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "padd", True
);
5943 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
5945 goto mmx_decode_failure
;
5946 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "padds", True
);
5950 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
5952 goto mmx_decode_failure
;
5953 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "paddus", True
);
5958 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
5960 goto mmx_decode_failure
;
5961 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "psub", True
);
5965 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
5967 goto mmx_decode_failure
;
5968 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "psubs", True
);
5972 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
5974 goto mmx_decode_failure
;
5975 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "psubus", True
);
5978 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
5980 goto mmx_decode_failure
;
5981 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "pmulhw", False
);
5984 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
5986 goto mmx_decode_failure
;
5987 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "pmullw", False
);
5990 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
5992 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "pmaddwd", False
);
5997 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
5999 goto mmx_decode_failure
;
6000 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "pcmpeq", True
);
6005 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
6007 goto mmx_decode_failure
;
6008 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "pcmpgt", True
);
6011 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
6013 goto mmx_decode_failure
;
6014 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "packssdw", False
);
6017 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
6019 goto mmx_decode_failure
;
6020 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "packsswb", False
);
6023 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
6025 goto mmx_decode_failure
;
6026 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "packuswb", False
);
6031 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
6033 goto mmx_decode_failure
;
6034 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "punpckh", True
);
6039 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
6041 goto mmx_decode_failure
;
6042 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "punpckl", True
);
6045 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
6047 goto mmx_decode_failure
;
6048 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "pand", False
);
6051 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
6053 goto mmx_decode_failure
;
6054 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "pandn", False
);
6057 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
6059 goto mmx_decode_failure
;
6060 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "por", False
);
6063 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
6065 goto mmx_decode_failure
;
6066 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "pxor", False
);
6069 # define SHIFT_BY_REG(_name,_op) \
6070 delta = dis_MMX_shiftG_byE(sorb, delta, _name, _op); \
6073 /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
6074 case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4
);
6075 case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2
);
6076 case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64
);
6078 /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
6079 case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4
);
6080 case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2
);
6081 case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64
);
6083 /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
6084 case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4
);
6085 case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2
);
6087 # undef SHIFT_BY_REG
6092 /* (sz==4): PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
6093 UChar byte2
, subopc
;
6095 goto mmx_decode_failure
;
6096 byte2
= getIByte(delta
); /* amode / sub-opcode */
6097 subopc
= toUChar( (byte2
>> 3) & 7 );
6099 # define SHIFT_BY_IMM(_name,_op) \
6100 do { delta = dis_MMX_shiftE_imm(delta,_name,_op); \
6103 if (subopc
== 2 /*SRL*/ && opc
== 0x71)
6104 SHIFT_BY_IMM("psrlw", Iop_ShrN16x4
);
6105 else if (subopc
== 2 /*SRL*/ && opc
== 0x72)
6106 SHIFT_BY_IMM("psrld", Iop_ShrN32x2
);
6107 else if (subopc
== 2 /*SRL*/ && opc
== 0x73)
6108 SHIFT_BY_IMM("psrlq", Iop_Shr64
);
6110 else if (subopc
== 4 /*SAR*/ && opc
== 0x71)
6111 SHIFT_BY_IMM("psraw", Iop_SarN16x4
);
6112 else if (subopc
== 4 /*SAR*/ && opc
== 0x72)
6113 SHIFT_BY_IMM("psrad", Iop_SarN32x2
);
6115 else if (subopc
== 6 /*SHL*/ && opc
== 0x71)
6116 SHIFT_BY_IMM("psllw", Iop_ShlN16x4
);
6117 else if (subopc
== 6 /*SHL*/ && opc
== 0x72)
6118 SHIFT_BY_IMM("pslld", Iop_ShlN32x2
);
6119 else if (subopc
== 6 /*SHL*/ && opc
== 0x73)
6120 SHIFT_BY_IMM("psllq", Iop_Shl64
);
6122 else goto mmx_decode_failure
;
6124 # undef SHIFT_BY_IMM
6129 IRTemp addr
= newTemp(Ity_I32
);
6130 IRTemp regD
= newTemp(Ity_I64
);
6131 IRTemp regM
= newTemp(Ity_I64
);
6132 IRTemp mask
= newTemp(Ity_I64
);
6133 IRTemp olddata
= newTemp(Ity_I64
);
6134 IRTemp newdata
= newTemp(Ity_I64
);
6136 modrm
= getIByte(delta
);
6137 if (sz
!= 4 || (!epartIsReg(modrm
)))
6138 goto mmx_decode_failure
;
6141 assign( addr
, handleSegOverride( sorb
, getIReg(4, R_EDI
) ));
6142 assign( regM
, getMMXReg( eregOfRM(modrm
) ));
6143 assign( regD
, getMMXReg( gregOfRM(modrm
) ));
6144 assign( mask
, binop(Iop_SarN8x8
, mkexpr(regM
), mkU8(7)) );
6145 assign( olddata
, loadLE( Ity_I64
, mkexpr(addr
) ));
6153 unop(Iop_Not64
, mkexpr(mask
)))) );
6154 storeLE( mkexpr(addr
), mkexpr(newdata
) );
6155 DIP("maskmovq %s,%s\n", nameMMXReg( eregOfRM(modrm
) ),
6156 nameMMXReg( gregOfRM(modrm
) ) );
6160 /* --- MMX decode failure --- */
6164 return delta
; /* ignored */
6173 /*------------------------------------------------------------*/
6174 /*--- More misc arithmetic and other obscure insns. ---*/
6175 /*------------------------------------------------------------*/
6177 /* Double length left and right shifts. Apparently only required in
6178 v-size (no b- variant). */
6180 UInt
dis_SHLRD_Gv_Ev ( UChar sorb
,
6181 Int delta
, UChar modrm
,
6184 Bool amt_is_literal
,
6185 const HChar
* shift_amt_txt
,
6188 /* shift_amt :: Ity_I8 is the amount to shift. shift_amt_txt is used
6189 for printing it. And eip on entry points at the modrm byte. */
6193 IRType ty
= szToITy(sz
);
6194 IRTemp gsrc
= newTemp(ty
);
6195 IRTemp esrc
= newTemp(ty
);
6196 IRTemp addr
= IRTemp_INVALID
;
6197 IRTemp tmpSH
= newTemp(Ity_I8
);
6198 IRTemp tmpL
= IRTemp_INVALID
;
6199 IRTemp tmpRes
= IRTemp_INVALID
;
6200 IRTemp tmpSubSh
= IRTemp_INVALID
;
6204 IRExpr
* mask
= NULL
;
6206 vassert(sz
== 2 || sz
== 4);
6208 /* The E-part is the destination; this is shifted. The G-part
6209 supplies bits to be shifted into the E-part, but is not
6212 If shifting left, form a double-length word with E at the top
6213 and G at the bottom, and shift this left. The result is then in
6216 If shifting right, form a double-length word with G at the top
6217 and E at the bottom, and shift this right. The result is then
6220 /* Fetch the operands. */
6222 assign( gsrc
, getIReg(sz
, gregOfRM(modrm
)) );
6224 if (epartIsReg(modrm
)) {
6226 assign( esrc
, getIReg(sz
, eregOfRM(modrm
)) );
6227 DIP("sh%cd%c %s, %s, %s\n",
6228 ( left_shift
? 'l' : 'r' ), nameISize(sz
),
6230 nameIReg(sz
, gregOfRM(modrm
)), nameIReg(sz
, eregOfRM(modrm
)));
6232 addr
= disAMode ( &len
, sorb
, delta
, dis_buf
);
6234 assign( esrc
, loadLE(ty
, mkexpr(addr
)) );
6235 DIP("sh%cd%c %s, %s, %s\n",
6236 ( left_shift
? 'l' : 'r' ), nameISize(sz
),
6238 nameIReg(sz
, gregOfRM(modrm
)), dis_buf
);
6241 /* Round up the relevant primops. */
6244 tmpL
= newTemp(Ity_I64
);
6245 tmpRes
= newTemp(Ity_I32
);
6246 tmpSubSh
= newTemp(Ity_I32
);
6247 mkpair
= Iop_32HLto64
;
6248 getres
= left_shift
? Iop_64HIto32
: Iop_64to32
;
6249 shift
= left_shift
? Iop_Shl64
: Iop_Shr64
;
6253 tmpL
= newTemp(Ity_I32
);
6254 tmpRes
= newTemp(Ity_I16
);
6255 tmpSubSh
= newTemp(Ity_I16
);
6256 mkpair
= Iop_16HLto32
;
6257 getres
= left_shift
? Iop_32HIto16
: Iop_32to16
;
6258 shift
= left_shift
? Iop_Shl32
: Iop_Shr32
;
6262 /* Do the shift, calculate the subshift value, and set
6265 assign( tmpSH
, binop(Iop_And8
, shift_amt
, mask
) );
6268 assign( tmpL
, binop(mkpair
, mkexpr(esrc
), mkexpr(gsrc
)) );
6270 assign( tmpL
, binop(mkpair
, mkexpr(gsrc
), mkexpr(esrc
)) );
6272 assign( tmpRes
, unop(getres
, binop(shift
, mkexpr(tmpL
), mkexpr(tmpSH
)) ) );
6278 binop(Iop_Sub8
, mkexpr(tmpSH
), mkU8(1) ),
6281 setFlags_DEP1_DEP2_shift ( left_shift
? Iop_Shl32
: Iop_Sar32
,
6282 tmpRes
, tmpSubSh
, ty
, tmpSH
);
6284 /* Put result back. */
6286 if (epartIsReg(modrm
)) {
6287 putIReg(sz
, eregOfRM(modrm
), mkexpr(tmpRes
));
6289 storeLE( mkexpr(addr
), mkexpr(tmpRes
) );
6292 if (amt_is_literal
) delta
++;
6297 /* Handle BT/BTS/BTR/BTC Gv, Ev. Apparently b-size is not
6300 typedef enum { BtOpNone
, BtOpSet
, BtOpReset
, BtOpComp
} BtOp
;
6302 static const HChar
* nameBtOp ( BtOp op
)
6305 case BtOpNone
: return "";
6306 case BtOpSet
: return "s";
6307 case BtOpReset
: return "r";
6308 case BtOpComp
: return "c";
6309 default: vpanic("nameBtOp(x86)");
6315 UInt
dis_bt_G_E ( const VexAbiInfo
* vbi
,
6316 UChar sorb
, Bool locked
, Int sz
, Int delta
, BtOp op
)
6321 IRTemp t_fetched
, t_bitno0
, t_bitno1
, t_bitno2
, t_addr0
,
6322 t_addr1
, t_esp
, t_mask
, t_new
;
6324 vassert(sz
== 2 || sz
== 4);
6326 t_fetched
= t_bitno0
= t_bitno1
= t_bitno2
6327 = t_addr0
= t_addr1
= t_esp
6328 = t_mask
= t_new
= IRTemp_INVALID
;
6330 t_fetched
= newTemp(Ity_I8
);
6331 t_new
= newTemp(Ity_I8
);
6332 t_bitno0
= newTemp(Ity_I32
);
6333 t_bitno1
= newTemp(Ity_I32
);
6334 t_bitno2
= newTemp(Ity_I8
);
6335 t_addr1
= newTemp(Ity_I32
);
6336 modrm
= getIByte(delta
);
6338 assign( t_bitno0
, widenSto32(getIReg(sz
, gregOfRM(modrm
))) );
6340 if (epartIsReg(modrm
)) {
6342 /* Get it onto the client's stack. */
6343 t_esp
= newTemp(Ity_I32
);
6344 t_addr0
= newTemp(Ity_I32
);
6346 /* For the choice of the value 128, see comment in dis_bt_G_E in
6347 guest_amd64_toIR.c. We point out here only that 128 is
6348 fast-cased in Memcheck and is > 0, so seems like a good
6350 vassert(vbi
->guest_stack_redzone_size
== 0);
6351 assign( t_esp
, binop(Iop_Sub32
, getIReg(4, R_ESP
), mkU32(128)) );
6352 putIReg(4, R_ESP
, mkexpr(t_esp
));
6354 storeLE( mkexpr(t_esp
), getIReg(sz
, eregOfRM(modrm
)) );
6356 /* Make t_addr0 point at it. */
6357 assign( t_addr0
, mkexpr(t_esp
) );
6359 /* Mask out upper bits of the shift amount, since we're doing a
6361 assign( t_bitno1
, binop(Iop_And32
,
6363 mkU32(sz
== 4 ? 31 : 15)) );
6366 t_addr0
= disAMode ( &len
, sorb
, delta
, dis_buf
);
6368 assign( t_bitno1
, mkexpr(t_bitno0
) );
6371 /* At this point: t_addr0 is the address being operated on. If it
6372 was a reg, we will have pushed it onto the client's stack.
6373 t_bitno1 is the bit number, suitably masked in the case of a
6376 /* Now the main sequence. */
6380 binop(Iop_Sar32
, mkexpr(t_bitno1
), mkU8(3))) );
6382 /* t_addr1 now holds effective address */
6386 binop(Iop_And32
, mkexpr(t_bitno1
), mkU32(7))) );
6388 /* t_bitno2 contains offset of bit within byte */
6390 if (op
!= BtOpNone
) {
6391 t_mask
= newTemp(Ity_I8
);
6392 assign( t_mask
, binop(Iop_Shl8
, mkU8(1), mkexpr(t_bitno2
)) );
6395 /* t_mask is now a suitable byte mask */
6397 assign( t_fetched
, loadLE(Ity_I8
, mkexpr(t_addr1
)) );
6399 if (op
!= BtOpNone
) {
6403 binop(Iop_Or8
, mkexpr(t_fetched
), mkexpr(t_mask
)) );
6407 binop(Iop_Xor8
, mkexpr(t_fetched
), mkexpr(t_mask
)) );
6411 binop(Iop_And8
, mkexpr(t_fetched
),
6412 unop(Iop_Not8
, mkexpr(t_mask
))) );
6415 vpanic("dis_bt_G_E(x86)");
6417 if (locked
&& !epartIsReg(modrm
)) {
6418 casLE( mkexpr(t_addr1
), mkexpr(t_fetched
)/*expd*/,
6419 mkexpr(t_new
)/*new*/,
6420 guest_EIP_curr_instr
);
6422 storeLE( mkexpr(t_addr1
), mkexpr(t_new
) );
6426 /* Side effect done; now get selected bit into Carry flag */
6427 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
6428 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
6429 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
6434 unop(Iop_8Uto32
, mkexpr(t_fetched
)),
6438 /* Set NDEP even though it isn't used. This makes redundant-PUT
6439 elimination of previous stores to this field work better. */
6440 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
6442 /* Move reg operand from stack back to reg */
6443 if (epartIsReg(modrm
)) {
6444 /* t_esp still points at it. */
6445 putIReg(sz
, eregOfRM(modrm
), loadLE(szToITy(sz
), mkexpr(t_esp
)) );
6446 putIReg(4, R_ESP
, binop(Iop_Add32
, mkexpr(t_esp
), mkU32(128)) );
6449 DIP("bt%s%c %s, %s\n",
6450 nameBtOp(op
), nameISize(sz
), nameIReg(sz
, gregOfRM(modrm
)),
6451 ( epartIsReg(modrm
) ? nameIReg(sz
, eregOfRM(modrm
)) : dis_buf
) );
6458 /* Handle BSF/BSR. Only v-size seems necessary. */
6460 UInt
dis_bs_E_G ( UChar sorb
, Int sz
, Int delta
, Bool fwds
)
6466 IRType ty
= szToITy(sz
);
6467 IRTemp src
= newTemp(ty
);
6468 IRTemp dst
= newTemp(ty
);
6470 IRTemp src32
= newTemp(Ity_I32
);
6471 IRTemp dst32
= newTemp(Ity_I32
);
6472 IRTemp srcB
= newTemp(Ity_I1
);
6474 vassert(sz
== 4 || sz
== 2);
6476 modrm
= getIByte(delta
);
6478 isReg
= epartIsReg(modrm
);
6481 assign( src
, getIReg(sz
, eregOfRM(modrm
)) );
6484 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
6486 assign( src
, loadLE(ty
, mkexpr(addr
)) );
6489 DIP("bs%c%c %s, %s\n",
6490 fwds
? 'f' : 'r', nameISize(sz
),
6491 ( isReg
? nameIReg(sz
, eregOfRM(modrm
)) : dis_buf
),
6492 nameIReg(sz
, gregOfRM(modrm
)));
6494 /* Generate a bool expression which is zero iff the original is
6495 zero, and nonzero otherwise. Ask for a CmpNE version which, if
6496 instrumented by Memcheck, is instrumented expensively, since
6497 this may be used on the output of a preceding movmskb insn,
6498 which has been known to be partially defined, and in need of
6499 careful handling. */
6500 assign( srcB
, binop(mkSizedOp(ty
,Iop_ExpCmpNE8
),
6501 mkexpr(src
), mkU(ty
,0)) );
6503 /* Flags: Z is 1 iff source value is zero. All others
6504 are undefined -- we force them to zero. */
6505 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
6506 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
6509 IRExpr_ITE( mkexpr(srcB
),
6513 mkU32(X86G_CC_MASK_Z
)
6516 /* Set NDEP even though it isn't used. This makes redundant-PUT
6517 elimination of previous stores to this field work better. */
6518 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
6520 /* Result: iff source value is zero, we can't use
6521 Iop_Clz32/Iop_Ctz32 as they have no defined result in that case.
6522 But anyway, Intel x86 semantics say the result is undefined in
6523 such situations. Hence handle the zero case specially. */
6525 /* Bleh. What we compute:
6527 bsf32: if src == 0 then 0 else Ctz32(src)
6528 bsr32: if src == 0 then 0 else 31 - Clz32(src)
6530 bsf16: if src == 0 then 0 else Ctz32(16Uto32(src))
6531 bsr16: if src == 0 then 0 else 31 - Clz32(16Uto32(src))
6533 First, widen src to 32 bits if it is not already.
6535 Postscript 15 Oct 04: it seems that at least VIA Nehemiah leaves the
6536 dst register unchanged when src == 0. Hence change accordingly.
6539 assign( src32
, unop(Iop_16Uto32
, mkexpr(src
)) );
6541 assign( src32
, mkexpr(src
) );
6543 /* The main computation, guarding against zero. */
6548 fwds
? unop(Iop_Ctz32
, mkexpr(src32
))
6551 unop(Iop_Clz32
, mkexpr(src32
))),
6552 /* src == 0 -- leave dst unchanged */
6553 widenUto32( getIReg( sz
, gregOfRM(modrm
) ) )
6558 assign( dst
, unop(Iop_32to16
, mkexpr(dst32
)) );
6560 assign( dst
, mkexpr(dst32
) );
6562 /* dump result back */
6563 putIReg( sz
, gregOfRM(modrm
), mkexpr(dst
) );
6570 void codegen_xchg_eAX_Reg ( Int sz
, Int reg
)
6572 IRType ty
= szToITy(sz
);
6573 IRTemp t1
= newTemp(ty
);
6574 IRTemp t2
= newTemp(ty
);
6575 vassert(sz
== 2 || sz
== 4);
6576 assign( t1
, getIReg(sz
, R_EAX
) );
6577 assign( t2
, getIReg(sz
, reg
) );
6578 putIReg( sz
, R_EAX
, mkexpr(t2
) );
6579 putIReg( sz
, reg
, mkexpr(t1
) );
6580 DIP("xchg%c %s, %s\n",
6581 nameISize(sz
), nameIReg(sz
, R_EAX
), nameIReg(sz
, reg
));
6586 void codegen_SAHF ( void )
6588 /* Set the flags to:
6589 (x86g_calculate_flags_all() & X86G_CC_MASK_O) -- retain the old O flag
6590 | (%AH & (X86G_CC_MASK_S|X86G_CC_MASK_Z|X86G_CC_MASK_A
6591 |X86G_CC_MASK_P|X86G_CC_MASK_C)
6593 UInt mask_SZACP
= X86G_CC_MASK_S
|X86G_CC_MASK_Z
|X86G_CC_MASK_A
6594 |X86G_CC_MASK_C
|X86G_CC_MASK_P
;
6595 IRTemp oldflags
= newTemp(Ity_I32
);
6596 assign( oldflags
, mk_x86g_calculate_eflags_all() );
6597 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
6598 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
6599 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
6600 stmt( IRStmt_Put( OFFB_CC_DEP1
,
6602 binop(Iop_And32
, mkexpr(oldflags
), mkU32(X86G_CC_MASK_O
)),
6604 binop(Iop_Shr32
, getIReg(4, R_EAX
), mkU8(8)),
6608 /* Set NDEP even though it isn't used. This makes redundant-PUT
6609 elimination of previous stores to this field work better. */
6610 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
6615 void codegen_LAHF ( void )
6617 /* AH <- EFLAGS(SF:ZF:0:AF:0:PF:1:CF) */
6618 IRExpr
* eax_with_hole
;
6621 UInt mask_SZACP
= X86G_CC_MASK_S
|X86G_CC_MASK_Z
|X86G_CC_MASK_A
6622 |X86G_CC_MASK_C
|X86G_CC_MASK_P
;
6624 IRTemp flags
= newTemp(Ity_I32
);
6625 assign( flags
, mk_x86g_calculate_eflags_all() );
6628 = binop(Iop_And32
, getIReg(4, R_EAX
), mkU32(0xFFFF00FF));
6630 = binop(Iop_Or32
, binop(Iop_And32
, mkexpr(flags
), mkU32(mask_SZACP
)),
6633 = binop(Iop_Or32
, eax_with_hole
,
6634 binop(Iop_Shl32
, new_byte
, mkU8(8)));
6635 putIReg(4, R_EAX
, new_eax
);
6640 UInt
dis_cmpxchg_G_E ( UChar sorb
,
6648 IRType ty
= szToITy(size
);
6649 IRTemp acc
= newTemp(ty
);
6650 IRTemp src
= newTemp(ty
);
6651 IRTemp dest
= newTemp(ty
);
6652 IRTemp dest2
= newTemp(ty
);
6653 IRTemp acc2
= newTemp(ty
);
6654 IRTemp cond
= newTemp(Ity_I1
);
6655 IRTemp addr
= IRTemp_INVALID
;
6656 UChar rm
= getUChar(delta0
);
6658 /* There are 3 cases to consider:
6660 reg-reg: ignore any lock prefix, generate sequence based
6663 reg-mem, not locked: ignore any lock prefix, generate sequence
6666 reg-mem, locked: use IRCAS
6668 if (epartIsReg(rm
)) {
6670 assign( dest
, getIReg(size
, eregOfRM(rm
)) );
6672 assign( src
, getIReg(size
, gregOfRM(rm
)) );
6673 assign( acc
, getIReg(size
, R_EAX
) );
6674 setFlags_DEP1_DEP2(Iop_Sub8
, acc
, dest
, ty
);
6675 assign( cond
, mk_x86g_calculate_condition(X86CondZ
) );
6676 assign( dest2
, IRExpr_ITE(mkexpr(cond
), mkexpr(src
), mkexpr(dest
)) );
6677 assign( acc2
, IRExpr_ITE(mkexpr(cond
), mkexpr(acc
), mkexpr(dest
)) );
6678 putIReg(size
, R_EAX
, mkexpr(acc2
));
6679 putIReg(size
, eregOfRM(rm
), mkexpr(dest2
));
6680 DIP("cmpxchg%c %s,%s\n", nameISize(size
),
6681 nameIReg(size
,gregOfRM(rm
)),
6682 nameIReg(size
,eregOfRM(rm
)) );
6684 else if (!epartIsReg(rm
) && !locked
) {
6686 addr
= disAMode ( &len
, sorb
, delta0
, dis_buf
);
6687 assign( dest
, loadLE(ty
, mkexpr(addr
)) );
6689 assign( src
, getIReg(size
, gregOfRM(rm
)) );
6690 assign( acc
, getIReg(size
, R_EAX
) );
6691 setFlags_DEP1_DEP2(Iop_Sub8
, acc
, dest
, ty
);
6692 assign( cond
, mk_x86g_calculate_condition(X86CondZ
) );
6693 assign( dest2
, IRExpr_ITE(mkexpr(cond
), mkexpr(src
), mkexpr(dest
)) );
6694 assign( acc2
, IRExpr_ITE(mkexpr(cond
), mkexpr(acc
), mkexpr(dest
)) );
6695 putIReg(size
, R_EAX
, mkexpr(acc2
));
6696 storeLE( mkexpr(addr
), mkexpr(dest2
) );
6697 DIP("cmpxchg%c %s,%s\n", nameISize(size
),
6698 nameIReg(size
,gregOfRM(rm
)), dis_buf
);
6700 else if (!epartIsReg(rm
) && locked
) {
6702 /* src is new value. acc is expected value. dest is old value.
6703 Compute success from the output of the IRCAS, and steer the
6704 new value for EAX accordingly: in case of success, EAX is
6706 addr
= disAMode ( &len
, sorb
, delta0
, dis_buf
);
6708 assign( src
, getIReg(size
, gregOfRM(rm
)) );
6709 assign( acc
, getIReg(size
, R_EAX
) );
6711 mkIRCAS( IRTemp_INVALID
, dest
, Iend_LE
, mkexpr(addr
),
6712 NULL
, mkexpr(acc
), NULL
, mkexpr(src
) )
6714 setFlags_DEP1_DEP2(Iop_Sub8
, acc
, dest
, ty
);
6715 assign( cond
, mk_x86g_calculate_condition(X86CondZ
) );
6716 assign( acc2
, IRExpr_ITE(mkexpr(cond
), mkexpr(acc
), mkexpr(dest
)) );
6717 putIReg(size
, R_EAX
, mkexpr(acc2
));
6718 DIP("cmpxchg%c %s,%s\n", nameISize(size
),
6719 nameIReg(size
,gregOfRM(rm
)), dis_buf
);
6727 /* Handle conditional move instructions of the form
6728 cmovcc E(reg-or-mem), G(reg)
6730 E(src) is reg-or-mem
6733 If E is reg, --> GET %E, tmps
6738 If E is mem --> (getAddr E) -> tmpa
6745 UInt
dis_cmov_E_G ( UChar sorb
,
6750 UChar rm
= getIByte(delta0
);
6754 IRType ty
= szToITy(sz
);
6755 IRTemp tmps
= newTemp(ty
);
6756 IRTemp tmpd
= newTemp(ty
);
6758 if (epartIsReg(rm
)) {
6759 assign( tmps
, getIReg(sz
, eregOfRM(rm
)) );
6760 assign( tmpd
, getIReg(sz
, gregOfRM(rm
)) );
6762 putIReg(sz
, gregOfRM(rm
),
6763 IRExpr_ITE( mk_x86g_calculate_condition(cond
),
6767 DIP("cmov%c%s %s,%s\n", nameISize(sz
),
6768 name_X86Condcode(cond
),
6769 nameIReg(sz
,eregOfRM(rm
)),
6770 nameIReg(sz
,gregOfRM(rm
)));
6774 /* E refers to memory */
6776 IRTemp addr
= disAMode ( &len
, sorb
, delta0
, dis_buf
);
6777 assign( tmps
, loadLE(ty
, mkexpr(addr
)) );
6778 assign( tmpd
, getIReg(sz
, gregOfRM(rm
)) );
6780 putIReg(sz
, gregOfRM(rm
),
6781 IRExpr_ITE( mk_x86g_calculate_condition(cond
),
6786 DIP("cmov%c%s %s,%s\n", nameISize(sz
),
6787 name_X86Condcode(cond
),
6789 nameIReg(sz
,gregOfRM(rm
)));
6796 UInt
dis_xadd_G_E ( UChar sorb
, Bool locked
, Int sz
, Int delta0
,
6800 UChar rm
= getIByte(delta0
);
6803 IRType ty
= szToITy(sz
);
6804 IRTemp tmpd
= newTemp(ty
);
6805 IRTemp tmpt0
= newTemp(ty
);
6806 IRTemp tmpt1
= newTemp(ty
);
6808 /* There are 3 cases to consider:
6810 reg-reg: ignore any lock prefix,
6811 generate 'naive' (non-atomic) sequence
6813 reg-mem, not locked: ignore any lock prefix, generate 'naive'
6814 (non-atomic) sequence
6816 reg-mem, locked: use IRCAS
6819 if (epartIsReg(rm
)) {
6821 assign( tmpd
, getIReg(sz
, eregOfRM(rm
)));
6822 assign( tmpt0
, getIReg(sz
, gregOfRM(rm
)) );
6823 assign( tmpt1
, binop(mkSizedOp(ty
,Iop_Add8
),
6824 mkexpr(tmpd
), mkexpr(tmpt0
)) );
6825 setFlags_DEP1_DEP2( Iop_Add8
, tmpd
, tmpt0
, ty
);
6826 putIReg(sz
, eregOfRM(rm
), mkexpr(tmpt1
));
6827 putIReg(sz
, gregOfRM(rm
), mkexpr(tmpd
));
6828 DIP("xadd%c %s, %s\n",
6829 nameISize(sz
), nameIReg(sz
,gregOfRM(rm
)),
6830 nameIReg(sz
,eregOfRM(rm
)));
6834 else if (!epartIsReg(rm
) && !locked
) {
6836 IRTemp addr
= disAMode ( &len
, sorb
, delta0
, dis_buf
);
6837 assign( tmpd
, loadLE(ty
, mkexpr(addr
)) );
6838 assign( tmpt0
, getIReg(sz
, gregOfRM(rm
)) );
6839 assign( tmpt1
, binop(mkSizedOp(ty
,Iop_Add8
),
6840 mkexpr(tmpd
), mkexpr(tmpt0
)) );
6841 storeLE( mkexpr(addr
), mkexpr(tmpt1
) );
6842 setFlags_DEP1_DEP2( Iop_Add8
, tmpd
, tmpt0
, ty
);
6843 putIReg(sz
, gregOfRM(rm
), mkexpr(tmpd
));
6844 DIP("xadd%c %s, %s\n",
6845 nameISize(sz
), nameIReg(sz
,gregOfRM(rm
)), dis_buf
);
6849 else if (!epartIsReg(rm
) && locked
) {
6851 IRTemp addr
= disAMode ( &len
, sorb
, delta0
, dis_buf
);
6852 assign( tmpd
, loadLE(ty
, mkexpr(addr
)) );
6853 assign( tmpt0
, getIReg(sz
, gregOfRM(rm
)) );
6854 assign( tmpt1
, binop(mkSizedOp(ty
,Iop_Add8
),
6855 mkexpr(tmpd
), mkexpr(tmpt0
)) );
6856 casLE( mkexpr(addr
), mkexpr(tmpd
)/*expVal*/,
6857 mkexpr(tmpt1
)/*newVal*/, guest_EIP_curr_instr
);
6858 setFlags_DEP1_DEP2( Iop_Add8
, tmpd
, tmpt0
, ty
);
6859 putIReg(sz
, gregOfRM(rm
), mkexpr(tmpd
));
6860 DIP("xadd%c %s, %s\n",
6861 nameISize(sz
), nameIReg(sz
,gregOfRM(rm
)), dis_buf
);
6869 /* Move 16 bits from Ew (ireg or mem) to G (a segment register). */
6872 UInt
dis_mov_Ew_Sw ( UChar sorb
, Int delta0
)
6876 UChar rm
= getIByte(delta0
);
6879 if (epartIsReg(rm
)) {
6880 putSReg( gregOfRM(rm
), getIReg(2, eregOfRM(rm
)) );
6881 DIP("movw %s,%s\n", nameIReg(2,eregOfRM(rm
)), nameSReg(gregOfRM(rm
)));
6884 addr
= disAMode ( &len
, sorb
, delta0
, dis_buf
);
6885 putSReg( gregOfRM(rm
), loadLE(Ity_I16
, mkexpr(addr
)) );
6886 DIP("movw %s,%s\n", dis_buf
, nameSReg(gregOfRM(rm
)));
6891 /* Move 16 bits from G (a segment register) to Ew (ireg or mem). If
6892 dst is ireg and sz==4, zero out top half of it. */
6895 UInt
dis_mov_Sw_Ew ( UChar sorb
,
6901 UChar rm
= getIByte(delta0
);
6904 vassert(sz
== 2 || sz
== 4);
6906 if (epartIsReg(rm
)) {
6908 putIReg(4, eregOfRM(rm
), unop(Iop_16Uto32
, getSReg(gregOfRM(rm
))));
6910 putIReg(2, eregOfRM(rm
), getSReg(gregOfRM(rm
)));
6912 DIP("mov %s,%s\n", nameSReg(gregOfRM(rm
)), nameIReg(sz
,eregOfRM(rm
)));
6915 addr
= disAMode ( &len
, sorb
, delta0
, dis_buf
);
6916 storeLE( mkexpr(addr
), getSReg(gregOfRM(rm
)) );
6917 DIP("mov %s,%s\n", nameSReg(gregOfRM(rm
)), dis_buf
);
6924 void dis_push_segreg ( UInt sreg
, Int sz
)
6926 IRTemp t1
= newTemp(Ity_I16
);
6927 IRTemp ta
= newTemp(Ity_I32
);
6928 vassert(sz
== 2 || sz
== 4);
6930 assign( t1
, getSReg(sreg
) );
6931 assign( ta
, binop(Iop_Sub32
, getIReg(4, R_ESP
), mkU32(sz
)) );
6932 putIReg(4, R_ESP
, mkexpr(ta
));
6933 storeLE( mkexpr(ta
), mkexpr(t1
) );
6935 DIP("push%c %s\n", sz
==2 ? 'w' : 'l', nameSReg(sreg
));
6939 void dis_pop_segreg ( UInt sreg
, Int sz
)
6941 IRTemp t1
= newTemp(Ity_I16
);
6942 IRTemp ta
= newTemp(Ity_I32
);
6943 vassert(sz
== 2 || sz
== 4);
6945 assign( ta
, getIReg(4, R_ESP
) );
6946 assign( t1
, loadLE(Ity_I16
, mkexpr(ta
)) );
6948 putIReg(4, R_ESP
, binop(Iop_Add32
, mkexpr(ta
), mkU32(sz
)) );
6949 putSReg( sreg
, mkexpr(t1
) );
6950 DIP("pop%c %s\n", sz
==2 ? 'w' : 'l', nameSReg(sreg
));
6954 void dis_ret ( /*MOD*/DisResult
* dres
, UInt d32
)
6956 IRTemp t1
= newTemp(Ity_I32
);
6957 IRTemp t2
= newTemp(Ity_I32
);
6958 assign(t1
, getIReg(4,R_ESP
));
6959 assign(t2
, loadLE(Ity_I32
,mkexpr(t1
)));
6960 putIReg(4, R_ESP
,binop(Iop_Add32
, mkexpr(t1
), mkU32(4+d32
)));
6961 jmp_treg(dres
, Ijk_Ret
, t2
);
6962 vassert(dres
->whatNext
== Dis_StopHere
);
6965 /*------------------------------------------------------------*/
6966 /*--- SSE/SSE2/SSE3 helpers ---*/
6967 /*------------------------------------------------------------*/
6969 /* Indicates whether the op requires a rounding-mode argument. Note
6970 that this covers only vector floating point arithmetic ops, and
6971 omits the scalar ones that need rounding modes. Note also that
6972 inconsistencies here will get picked up later by the IR sanity
6973 checker, so this isn't correctness-critical. */
6974 static Bool
requiresRMode ( IROp op
)
6978 case Iop_Add32Fx4
: case Iop_Sub32Fx4
:
6979 case Iop_Mul32Fx4
: case Iop_Div32Fx4
:
6980 case Iop_Add64Fx2
: case Iop_Sub64Fx2
:
6981 case Iop_Mul64Fx2
: case Iop_Div64Fx2
:
6990 /* Worker function; do not call directly.
6991 Handles full width G = G `op` E and G = (not G) `op` E.
6994 static UInt
dis_SSE_E_to_G_all_wrk (
6995 UChar sorb
, Int delta
,
6996 const HChar
* opname
, IROp op
,
7003 UChar rm
= getIByte(delta
);
7005 = invertG
? unop(Iop_NotV128
, getXMMReg(gregOfRM(rm
)))
7006 : getXMMReg(gregOfRM(rm
));
7007 if (epartIsReg(rm
)) {
7011 ? triop(op
, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
7013 getXMMReg(eregOfRM(rm
)))
7015 getXMMReg(eregOfRM(rm
)))
7017 DIP("%s %s,%s\n", opname
,
7018 nameXMMReg(eregOfRM(rm
)),
7019 nameXMMReg(gregOfRM(rm
)) );
7022 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
7026 ? triop(op
, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
7028 loadLE(Ity_V128
, mkexpr(addr
)))
7030 loadLE(Ity_V128
, mkexpr(addr
)))
7032 DIP("%s %s,%s\n", opname
,
7034 nameXMMReg(gregOfRM(rm
)) );
7040 /* All lanes SSE binary operation, G = G `op` E. */
7043 UInt
dis_SSE_E_to_G_all ( UChar sorb
, Int delta
, const HChar
* opname
, IROp op
)
7045 return dis_SSE_E_to_G_all_wrk( sorb
, delta
, opname
, op
, False
);
7048 /* All lanes SSE binary operation, G = (not G) `op` E. */
7051 UInt
dis_SSE_E_to_G_all_invG ( UChar sorb
, Int delta
,
7052 const HChar
* opname
, IROp op
)
7054 return dis_SSE_E_to_G_all_wrk( sorb
, delta
, opname
, op
, True
);
7058 /* Lowest 32-bit lane only SSE binary operation, G = G `op` E. */
7060 static UInt
dis_SSE_E_to_G_lo32 ( UChar sorb
, Int delta
,
7061 const HChar
* opname
, IROp op
)
7066 UChar rm
= getIByte(delta
);
7067 IRExpr
* gpart
= getXMMReg(gregOfRM(rm
));
7068 if (epartIsReg(rm
)) {
7069 putXMMReg( gregOfRM(rm
),
7071 getXMMReg(eregOfRM(rm
))) );
7072 DIP("%s %s,%s\n", opname
,
7073 nameXMMReg(eregOfRM(rm
)),
7074 nameXMMReg(gregOfRM(rm
)) );
7077 /* We can only do a 32-bit memory read, so the upper 3/4 of the
7078 E operand needs to be made simply of zeroes. */
7079 IRTemp epart
= newTemp(Ity_V128
);
7080 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
7081 assign( epart
, unop( Iop_32UtoV128
,
7082 loadLE(Ity_I32
, mkexpr(addr
))) );
7083 putXMMReg( gregOfRM(rm
),
7084 binop(op
, gpart
, mkexpr(epart
)) );
7085 DIP("%s %s,%s\n", opname
,
7087 nameXMMReg(gregOfRM(rm
)) );
7093 /* Lower 64-bit lane only SSE binary operation, G = G `op` E. */
7095 static UInt
dis_SSE_E_to_G_lo64 ( UChar sorb
, Int delta
,
7096 const HChar
* opname
, IROp op
)
7101 UChar rm
= getIByte(delta
);
7102 IRExpr
* gpart
= getXMMReg(gregOfRM(rm
));
7103 if (epartIsReg(rm
)) {
7104 putXMMReg( gregOfRM(rm
),
7106 getXMMReg(eregOfRM(rm
))) );
7107 DIP("%s %s,%s\n", opname
,
7108 nameXMMReg(eregOfRM(rm
)),
7109 nameXMMReg(gregOfRM(rm
)) );
7112 /* We can only do a 64-bit memory read, so the upper half of the
7113 E operand needs to be made simply of zeroes. */
7114 IRTemp epart
= newTemp(Ity_V128
);
7115 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
7116 assign( epart
, unop( Iop_64UtoV128
,
7117 loadLE(Ity_I64
, mkexpr(addr
))) );
7118 putXMMReg( gregOfRM(rm
),
7119 binop(op
, gpart
, mkexpr(epart
)) );
7120 DIP("%s %s,%s\n", opname
,
7122 nameXMMReg(gregOfRM(rm
)) );
7128 /* All lanes unary SSE operation, G = op(E). */
7130 static UInt
dis_SSE_E_to_G_unary_all (
7131 UChar sorb
, Int delta
,
7132 const HChar
* opname
, IROp op
7138 UChar rm
= getIByte(delta
);
7139 // Sqrt32Fx4 and Sqrt64Fx2 take a rounding mode, which is faked
7140 // up in the usual way.
7141 Bool needsIRRM
= op
== Iop_Sqrt32Fx4
|| op
== Iop_Sqrt64Fx2
;
7142 if (epartIsReg(rm
)) {
7143 IRExpr
* src
= getXMMReg(eregOfRM(rm
));
7144 /* XXXROUNDINGFIXME */
7145 IRExpr
* res
= needsIRRM
? binop(op
, get_FAKE_roundingmode(), src
)
7147 putXMMReg( gregOfRM(rm
), res
);
7148 DIP("%s %s,%s\n", opname
,
7149 nameXMMReg(eregOfRM(rm
)),
7150 nameXMMReg(gregOfRM(rm
)) );
7153 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
7154 IRExpr
* src
= loadLE(Ity_V128
, mkexpr(addr
));
7155 /* XXXROUNDINGFIXME */
7156 IRExpr
* res
= needsIRRM
? binop(op
, get_FAKE_roundingmode(), src
)
7158 putXMMReg( gregOfRM(rm
), res
);
7159 DIP("%s %s,%s\n", opname
,
7161 nameXMMReg(gregOfRM(rm
)) );
7167 /* Lowest 32-bit lane only unary SSE operation, G = op(E). */
7169 static UInt
dis_SSE_E_to_G_unary_lo32 (
7170 UChar sorb
, Int delta
,
7171 const HChar
* opname
, IROp op
7174 /* First we need to get the old G value and patch the low 32 bits
7175 of the E operand into it. Then apply op and write back to G. */
7179 UChar rm
= getIByte(delta
);
7180 IRTemp oldG0
= newTemp(Ity_V128
);
7181 IRTemp oldG1
= newTemp(Ity_V128
);
7183 assign( oldG0
, getXMMReg(gregOfRM(rm
)) );
7185 if (epartIsReg(rm
)) {
7187 binop( Iop_SetV128lo32
,
7189 getXMMRegLane32(eregOfRM(rm
), 0)) );
7190 putXMMReg( gregOfRM(rm
), unop(op
, mkexpr(oldG1
)) );
7191 DIP("%s %s,%s\n", opname
,
7192 nameXMMReg(eregOfRM(rm
)),
7193 nameXMMReg(gregOfRM(rm
)) );
7196 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
7198 binop( Iop_SetV128lo32
,
7200 loadLE(Ity_I32
, mkexpr(addr
)) ));
7201 putXMMReg( gregOfRM(rm
), unop(op
, mkexpr(oldG1
)) );
7202 DIP("%s %s,%s\n", opname
,
7204 nameXMMReg(gregOfRM(rm
)) );
7210 /* Lowest 64-bit lane only unary SSE operation, G = op(E). */
7212 static UInt
dis_SSE_E_to_G_unary_lo64 (
7213 UChar sorb
, Int delta
,
7214 const HChar
* opname
, IROp op
7217 /* First we need to get the old G value and patch the low 64 bits
7218 of the E operand into it. Then apply op and write back to G. */
7222 UChar rm
= getIByte(delta
);
7223 IRTemp oldG0
= newTemp(Ity_V128
);
7224 IRTemp oldG1
= newTemp(Ity_V128
);
7226 assign( oldG0
, getXMMReg(gregOfRM(rm
)) );
7228 if (epartIsReg(rm
)) {
7230 binop( Iop_SetV128lo64
,
7232 getXMMRegLane64(eregOfRM(rm
), 0)) );
7233 putXMMReg( gregOfRM(rm
), unop(op
, mkexpr(oldG1
)) );
7234 DIP("%s %s,%s\n", opname
,
7235 nameXMMReg(eregOfRM(rm
)),
7236 nameXMMReg(gregOfRM(rm
)) );
7239 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
7241 binop( Iop_SetV128lo64
,
7243 loadLE(Ity_I64
, mkexpr(addr
)) ));
7244 putXMMReg( gregOfRM(rm
), unop(op
, mkexpr(oldG1
)) );
7245 DIP("%s %s,%s\n", opname
,
7247 nameXMMReg(gregOfRM(rm
)) );
7253 /* SSE integer binary operation:
7254 G = G `op` E (eLeft == False)
7255 G = E `op` G (eLeft == True)
7257 static UInt
dis_SSEint_E_to_G(
7258 UChar sorb
, Int delta
,
7259 const HChar
* opname
, IROp op
,
7266 UChar rm
= getIByte(delta
);
7267 IRExpr
* gpart
= getXMMReg(gregOfRM(rm
));
7268 IRExpr
* epart
= NULL
;
7269 if (epartIsReg(rm
)) {
7270 epart
= getXMMReg(eregOfRM(rm
));
7271 DIP("%s %s,%s\n", opname
,
7272 nameXMMReg(eregOfRM(rm
)),
7273 nameXMMReg(gregOfRM(rm
)) );
7276 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
7277 epart
= loadLE(Ity_V128
, mkexpr(addr
));
7278 DIP("%s %s,%s\n", opname
,
7280 nameXMMReg(gregOfRM(rm
)) );
7283 putXMMReg( gregOfRM(rm
),
7284 eLeft
? binop(op
, epart
, gpart
)
7285 : binop(op
, gpart
, epart
) );
7290 /* Helper for doing SSE FP comparisons. */
7292 static void findSSECmpOp ( Bool
* needNot
, IROp
* op
,
7293 Int imm8
, Bool all_lanes
, Int sz
)
7303 if (sz
== 4 && all_lanes
) {
7305 case 0: *op
= Iop_CmpEQ32Fx4
; return;
7306 case 1: *op
= Iop_CmpLT32Fx4
; return;
7307 case 2: *op
= Iop_CmpLE32Fx4
; return;
7308 case 3: *op
= Iop_CmpUN32Fx4
; return;
7312 if (sz
== 4 && !all_lanes
) {
7314 case 0: *op
= Iop_CmpEQ32F0x4
; return;
7315 case 1: *op
= Iop_CmpLT32F0x4
; return;
7316 case 2: *op
= Iop_CmpLE32F0x4
; return;
7317 case 3: *op
= Iop_CmpUN32F0x4
; return;
7321 if (sz
== 8 && all_lanes
) {
7323 case 0: *op
= Iop_CmpEQ64Fx2
; return;
7324 case 1: *op
= Iop_CmpLT64Fx2
; return;
7325 case 2: *op
= Iop_CmpLE64Fx2
; return;
7326 case 3: *op
= Iop_CmpUN64Fx2
; return;
7330 if (sz
== 8 && !all_lanes
) {
7332 case 0: *op
= Iop_CmpEQ64F0x2
; return;
7333 case 1: *op
= Iop_CmpLT64F0x2
; return;
7334 case 2: *op
= Iop_CmpLE64F0x2
; return;
7335 case 3: *op
= Iop_CmpUN64F0x2
; return;
7339 vpanic("findSSECmpOp(x86,guest)");
7342 /* Handles SSE 32F/64F comparisons. */
7344 static UInt
dis_SSEcmp_E_to_G ( UChar sorb
, Int delta
,
7345 const HChar
* opname
, Bool all_lanes
, Int sz
)
7350 Bool needNot
= False
;
7351 IROp op
= Iop_INVALID
;
7352 IRTemp plain
= newTemp(Ity_V128
);
7353 UChar rm
= getIByte(delta
);
7355 vassert(sz
== 4 || sz
== 8);
7356 if (epartIsReg(rm
)) {
7357 imm8
= getIByte(delta
+1);
7358 findSSECmpOp(&needNot
, &op
, imm8
, all_lanes
, sz
);
7359 assign( plain
, binop(op
, getXMMReg(gregOfRM(rm
)),
7360 getXMMReg(eregOfRM(rm
))) );
7362 DIP("%s $%d,%s,%s\n", opname
,
7364 nameXMMReg(eregOfRM(rm
)),
7365 nameXMMReg(gregOfRM(rm
)) );
7367 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
7368 imm8
= getIByte(delta
+alen
);
7369 findSSECmpOp(&needNot
, &op
, imm8
, all_lanes
, sz
);
7373 getXMMReg(gregOfRM(rm
)),
7374 all_lanes
? loadLE(Ity_V128
, mkexpr(addr
))
7375 : sz
== 8 ? unop( Iop_64UtoV128
, loadLE(Ity_I64
, mkexpr(addr
)))
7376 : /*sz==4*/ unop( Iop_32UtoV128
, loadLE(Ity_I32
, mkexpr(addr
)))
7380 DIP("%s $%d,%s,%s\n", opname
,
7383 nameXMMReg(gregOfRM(rm
)) );
7386 if (needNot
&& all_lanes
) {
7387 putXMMReg( gregOfRM(rm
),
7388 unop(Iop_NotV128
, mkexpr(plain
)) );
7391 if (needNot
&& !all_lanes
) {
7392 mask
= toUShort( sz
==4 ? 0x000F : 0x00FF );
7393 putXMMReg( gregOfRM(rm
),
7394 binop(Iop_XorV128
, mkexpr(plain
), mkV128(mask
)) );
7397 putXMMReg( gregOfRM(rm
), mkexpr(plain
) );
7404 /* Vector by scalar shift of G by the amount specified at the bottom
7407 static UInt
dis_SSE_shiftG_byE ( UChar sorb
, Int delta
,
7408 const HChar
* opname
, IROp op
)
7414 UChar rm
= getIByte(delta
);
7415 IRTemp g0
= newTemp(Ity_V128
);
7416 IRTemp g1
= newTemp(Ity_V128
);
7417 IRTemp amt
= newTemp(Ity_I32
);
7418 IRTemp amt8
= newTemp(Ity_I8
);
7419 if (epartIsReg(rm
)) {
7420 assign( amt
, getXMMRegLane32(eregOfRM(rm
), 0) );
7421 DIP("%s %s,%s\n", opname
,
7422 nameXMMReg(eregOfRM(rm
)),
7423 nameXMMReg(gregOfRM(rm
)) );
7426 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
7427 assign( amt
, loadLE(Ity_I32
, mkexpr(addr
)) );
7428 DIP("%s %s,%s\n", opname
,
7430 nameXMMReg(gregOfRM(rm
)) );
7433 assign( g0
, getXMMReg(gregOfRM(rm
)) );
7434 assign( amt8
, unop(Iop_32to8
, mkexpr(amt
)) );
7436 shl
= shr
= sar
= False
;
7439 case Iop_ShlN16x8
: shl
= True
; size
= 32; break;
7440 case Iop_ShlN32x4
: shl
= True
; size
= 32; break;
7441 case Iop_ShlN64x2
: shl
= True
; size
= 64; break;
7442 case Iop_SarN16x8
: sar
= True
; size
= 16; break;
7443 case Iop_SarN32x4
: sar
= True
; size
= 32; break;
7444 case Iop_ShrN16x8
: shr
= True
; size
= 16; break;
7445 case Iop_ShrN32x4
: shr
= True
; size
= 32; break;
7446 case Iop_ShrN64x2
: shr
= True
; size
= 64; break;
7447 default: vassert(0);
7454 binop(Iop_CmpLT32U
,mkexpr(amt
),mkU32(size
)),
7455 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
7464 binop(Iop_CmpLT32U
,mkexpr(amt
),mkU32(size
)),
7465 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
7466 binop(op
, mkexpr(g0
), mkU8(size
-1))
7474 putXMMReg( gregOfRM(rm
), mkexpr(g1
) );
7479 /* Vector by scalar shift of E by an immediate byte. */
7482 UInt
dis_SSE_shiftE_imm ( Int delta
, const HChar
* opname
, IROp op
)
7485 UChar rm
= getIByte(delta
);
7486 IRTemp e0
= newTemp(Ity_V128
);
7487 IRTemp e1
= newTemp(Ity_V128
);
7489 vassert(epartIsReg(rm
));
7490 vassert(gregOfRM(rm
) == 2
7491 || gregOfRM(rm
) == 4 || gregOfRM(rm
) == 6);
7492 amt
= getIByte(delta
+1);
7494 DIP("%s $%d,%s\n", opname
,
7496 nameXMMReg(eregOfRM(rm
)) );
7497 assign( e0
, getXMMReg(eregOfRM(rm
)) );
7499 shl
= shr
= sar
= False
;
7502 case Iop_ShlN16x8
: shl
= True
; size
= 16; break;
7503 case Iop_ShlN32x4
: shl
= True
; size
= 32; break;
7504 case Iop_ShlN64x2
: shl
= True
; size
= 64; break;
7505 case Iop_SarN16x8
: sar
= True
; size
= 16; break;
7506 case Iop_SarN32x4
: sar
= True
; size
= 32; break;
7507 case Iop_ShrN16x8
: shr
= True
; size
= 16; break;
7508 case Iop_ShrN32x4
: shr
= True
; size
= 32; break;
7509 case Iop_ShrN64x2
: shr
= True
; size
= 64; break;
7510 default: vassert(0);
7514 assign( e1
, amt
>= size
7516 : binop(op
, mkexpr(e0
), mkU8(amt
))
7520 assign( e1
, amt
>= size
7521 ? binop(op
, mkexpr(e0
), mkU8(size
-1))
7522 : binop(op
, mkexpr(e0
), mkU8(amt
))
7529 putXMMReg( eregOfRM(rm
), mkexpr(e1
) );
7534 /* Get the current SSE rounding mode. */
7536 static IRExpr
* /* :: Ity_I32 */ get_sse_roundingmode ( void )
7538 return binop( Iop_And32
,
7539 IRExpr_Get( OFFB_SSEROUND
, Ity_I32
),
7543 static void put_sse_roundingmode ( IRExpr
* sseround
)
7545 vassert(typeOfIRExpr(irsb
->tyenv
, sseround
) == Ity_I32
);
7546 stmt( IRStmt_Put( OFFB_SSEROUND
, sseround
) );
7549 /* Break a 128-bit value up into four 32-bit ints. */
7551 static void breakup128to32s ( IRTemp t128
,
7553 IRTemp
* t3
, IRTemp
* t2
,
7554 IRTemp
* t1
, IRTemp
* t0
)
7556 IRTemp hi64
= newTemp(Ity_I64
);
7557 IRTemp lo64
= newTemp(Ity_I64
);
7558 assign( hi64
, unop(Iop_V128HIto64
, mkexpr(t128
)) );
7559 assign( lo64
, unop(Iop_V128to64
, mkexpr(t128
)) );
7561 vassert(t0
&& *t0
== IRTemp_INVALID
);
7562 vassert(t1
&& *t1
== IRTemp_INVALID
);
7563 vassert(t2
&& *t2
== IRTemp_INVALID
);
7564 vassert(t3
&& *t3
== IRTemp_INVALID
);
7566 *t0
= newTemp(Ity_I32
);
7567 *t1
= newTemp(Ity_I32
);
7568 *t2
= newTemp(Ity_I32
);
7569 *t3
= newTemp(Ity_I32
);
7570 assign( *t0
, unop(Iop_64to32
, mkexpr(lo64
)) );
7571 assign( *t1
, unop(Iop_64HIto32
, mkexpr(lo64
)) );
7572 assign( *t2
, unop(Iop_64to32
, mkexpr(hi64
)) );
7573 assign( *t3
, unop(Iop_64HIto32
, mkexpr(hi64
)) );
7576 /* Construct a 128-bit value from four 32-bit ints. */
7578 static IRExpr
* mk128from32s ( IRTemp t3
, IRTemp t2
,
7579 IRTemp t1
, IRTemp t0
)
7582 binop( Iop_64HLtoV128
,
7583 binop(Iop_32HLto64
, mkexpr(t3
), mkexpr(t2
)),
7584 binop(Iop_32HLto64
, mkexpr(t1
), mkexpr(t0
))
7588 /* Break a 64-bit value up into four 16-bit ints. */
7590 static void breakup64to16s ( IRTemp t64
,
7592 IRTemp
* t3
, IRTemp
* t2
,
7593 IRTemp
* t1
, IRTemp
* t0
)
7595 IRTemp hi32
= newTemp(Ity_I32
);
7596 IRTemp lo32
= newTemp(Ity_I32
);
7597 assign( hi32
, unop(Iop_64HIto32
, mkexpr(t64
)) );
7598 assign( lo32
, unop(Iop_64to32
, mkexpr(t64
)) );
7600 vassert(t0
&& *t0
== IRTemp_INVALID
);
7601 vassert(t1
&& *t1
== IRTemp_INVALID
);
7602 vassert(t2
&& *t2
== IRTemp_INVALID
);
7603 vassert(t3
&& *t3
== IRTemp_INVALID
);
7605 *t0
= newTemp(Ity_I16
);
7606 *t1
= newTemp(Ity_I16
);
7607 *t2
= newTemp(Ity_I16
);
7608 *t3
= newTemp(Ity_I16
);
7609 assign( *t0
, unop(Iop_32to16
, mkexpr(lo32
)) );
7610 assign( *t1
, unop(Iop_32HIto16
, mkexpr(lo32
)) );
7611 assign( *t2
, unop(Iop_32to16
, mkexpr(hi32
)) );
7612 assign( *t3
, unop(Iop_32HIto16
, mkexpr(hi32
)) );
7615 /* Construct a 64-bit value from four 16-bit ints. */
7617 static IRExpr
* mk64from16s ( IRTemp t3
, IRTemp t2
,
7618 IRTemp t1
, IRTemp t0
)
7621 binop( Iop_32HLto64
,
7622 binop(Iop_16HLto32
, mkexpr(t3
), mkexpr(t2
)),
7623 binop(Iop_16HLto32
, mkexpr(t1
), mkexpr(t0
))
7627 /* Generate IR to set the guest %EFLAGS from the pushfl-format image
7628 in the given 32-bit temporary. The flags that are set are: O S Z A
7631 In all cases, code to set AC is generated. However, VEX actually
7632 ignores the AC value and so can optionally emit an emulation
7633 warning when it is enabled. In this routine, an emulation warning
7634 is only emitted if emit_AC_emwarn is True, in which case
7635 next_insn_EIP must be correct (this allows for correct code
7636 generation for popfl/popfw). If emit_AC_emwarn is False,
7637 next_insn_EIP is unimportant (this allows for easy if kludgey code
7638 generation for IRET.) */
7641 void set_EFLAGS_from_value ( IRTemp t1
,
7642 Bool emit_AC_emwarn
,
7643 Addr32 next_insn_EIP
)
7645 vassert(typeOfIRTemp(irsb
->tyenv
,t1
) == Ity_I32
);
7647 /* t1 is the flag word. Mask out everything except OSZACP and set
7648 the flags thunk to X86G_CC_OP_COPY. */
7649 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
7650 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
7651 stmt( IRStmt_Put( OFFB_CC_DEP1
,
7654 mkU32( X86G_CC_MASK_C
| X86G_CC_MASK_P
7655 | X86G_CC_MASK_A
| X86G_CC_MASK_Z
7656 | X86G_CC_MASK_S
| X86G_CC_MASK_O
)
7660 /* Set NDEP even though it isn't used. This makes redundant-PUT
7661 elimination of previous stores to this field work better. */
7662 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
7664 /* Also need to set the D flag, which is held in bit 10 of t1.
7665 If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */
7671 binop(Iop_Shr32
, mkexpr(t1
), mkU8(10)),
7677 /* Set the ID flag */
7683 binop(Iop_Shr32
, mkexpr(t1
), mkU8(21)),
7689 /* And set the AC flag. If setting it 1 to, possibly emit an
7690 emulation warning. */
7696 binop(Iop_Shr32
, mkexpr(t1
), mkU8(18)),
7702 if (emit_AC_emwarn
) {
7703 put_emwarn( mkU32(EmWarn_X86_acFlag
) );
7707 binop(Iop_And32
, mkexpr(t1
), mkU32(1<<18)),
7710 IRConst_U32( next_insn_EIP
),
7718 /* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit
7719 values (aa,bb), computes, for each of the 4 16-bit lanes:
7721 (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1
7723 static IRExpr
* dis_PMULHRSW_helper ( IRExpr
* aax
, IRExpr
* bbx
)
7725 IRTemp aa
= newTemp(Ity_I64
);
7726 IRTemp bb
= newTemp(Ity_I64
);
7727 IRTemp aahi32s
= newTemp(Ity_I64
);
7728 IRTemp aalo32s
= newTemp(Ity_I64
);
7729 IRTemp bbhi32s
= newTemp(Ity_I64
);
7730 IRTemp bblo32s
= newTemp(Ity_I64
);
7731 IRTemp rHi
= newTemp(Ity_I64
);
7732 IRTemp rLo
= newTemp(Ity_I64
);
7733 IRTemp one32x2
= newTemp(Ity_I64
);
7738 binop(Iop_InterleaveHI16x4
, mkexpr(aa
), mkexpr(aa
)),
7742 binop(Iop_InterleaveLO16x4
, mkexpr(aa
), mkexpr(aa
)),
7746 binop(Iop_InterleaveHI16x4
, mkexpr(bb
), mkexpr(bb
)),
7750 binop(Iop_InterleaveLO16x4
, mkexpr(bb
), mkexpr(bb
)),
7752 assign(one32x2
, mkU64( (1ULL << 32) + 1 ));
7761 binop(Iop_Mul32x2
, mkexpr(aahi32s
), mkexpr(bbhi32s
)),
7777 binop(Iop_Mul32x2
, mkexpr(aalo32s
), mkexpr(bblo32s
)),
7786 binop(Iop_CatEvenLanes16x4
, mkexpr(rHi
), mkexpr(rLo
));
7789 /* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit
7790 values (aa,bb), computes, for each lane:
7792 if aa_lane < 0 then - bb_lane
7793 else if aa_lane > 0 then bb_lane
7796 static IRExpr
* dis_PSIGN_helper ( IRExpr
* aax
, IRExpr
* bbx
, Int laneszB
)
7798 IRTemp aa
= newTemp(Ity_I64
);
7799 IRTemp bb
= newTemp(Ity_I64
);
7800 IRTemp zero
= newTemp(Ity_I64
);
7801 IRTemp bbNeg
= newTemp(Ity_I64
);
7802 IRTemp negMask
= newTemp(Ity_I64
);
7803 IRTemp posMask
= newTemp(Ity_I64
);
7804 IROp opSub
= Iop_INVALID
;
7805 IROp opCmpGTS
= Iop_INVALID
;
7808 case 1: opSub
= Iop_Sub8x8
; opCmpGTS
= Iop_CmpGT8Sx8
; break;
7809 case 2: opSub
= Iop_Sub16x4
; opCmpGTS
= Iop_CmpGT16Sx4
; break;
7810 case 4: opSub
= Iop_Sub32x2
; opCmpGTS
= Iop_CmpGT32Sx2
; break;
7811 default: vassert(0);
7816 assign( zero
, mkU64(0) );
7817 assign( bbNeg
, binop(opSub
, mkexpr(zero
), mkexpr(bb
)) );
7818 assign( negMask
, binop(opCmpGTS
, mkexpr(zero
), mkexpr(aa
)) );
7819 assign( posMask
, binop(opCmpGTS
, mkexpr(aa
), mkexpr(zero
)) );
7823 binop(Iop_And64
, mkexpr(bb
), mkexpr(posMask
)),
7824 binop(Iop_And64
, mkexpr(bbNeg
), mkexpr(negMask
)) );
7828 /* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit
7829 value aa, computes, for each lane
7831 if aa < 0 then -aa else aa
7833 Note that the result is interpreted as unsigned, so that the
7834 absolute value of the most negative signed input can be
7837 static IRExpr
* dis_PABS_helper ( IRExpr
* aax
, Int laneszB
)
7839 IRTemp aa
= newTemp(Ity_I64
);
7840 IRTemp zero
= newTemp(Ity_I64
);
7841 IRTemp aaNeg
= newTemp(Ity_I64
);
7842 IRTemp negMask
= newTemp(Ity_I64
);
7843 IRTemp posMask
= newTemp(Ity_I64
);
7844 IROp opSub
= Iop_INVALID
;
7845 IROp opSarN
= Iop_INVALID
;
7848 case 1: opSub
= Iop_Sub8x8
; opSarN
= Iop_SarN8x8
; break;
7849 case 2: opSub
= Iop_Sub16x4
; opSarN
= Iop_SarN16x4
; break;
7850 case 4: opSub
= Iop_Sub32x2
; opSarN
= Iop_SarN32x2
; break;
7851 default: vassert(0);
7855 assign( negMask
, binop(opSarN
, mkexpr(aa
), mkU8(8*laneszB
-1)) );
7856 assign( posMask
, unop(Iop_Not64
, mkexpr(negMask
)) );
7857 assign( zero
, mkU64(0) );
7858 assign( aaNeg
, binop(opSub
, mkexpr(zero
), mkexpr(aa
)) );
7861 binop(Iop_And64
, mkexpr(aa
), mkexpr(posMask
)),
7862 binop(Iop_And64
, mkexpr(aaNeg
), mkexpr(negMask
)) );
7865 static IRExpr
* dis_PALIGNR_XMM_helper ( IRTemp hi64
,
7866 IRTemp lo64
, Int byteShift
)
7868 vassert(byteShift
>= 1 && byteShift
<= 7);
7871 binop(Iop_Shl64
, mkexpr(hi64
), mkU8(8*(8-byteShift
))),
7872 binop(Iop_Shr64
, mkexpr(lo64
), mkU8(8*byteShift
))
7876 /* Generate a SIGSEGV followed by a restart of the current instruction
7877 if effective_addr is not 16-aligned. This is required behaviour
7878 for some SSE3 instructions and all 128-bit SSSE3 instructions.
7879 This assumes that guest_RIP_curr_instr is set correctly! */
7880 static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr
)
7885 binop(Iop_And32
,mkexpr(effective_addr
),mkU32(0xF)),
7888 IRConst_U32(guest_EIP_curr_instr
),
7895 /* Helper for deciding whether a given insn (starting at the opcode
7896 byte) may validly be used with a LOCK prefix. The following insns
7897 may be used with LOCK when their destination operand is in memory.
7898 AFAICS this is exactly the same for both 32-bit and 64-bit mode.
7900 ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01
7901 OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09
7902 ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11
7903 SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19
7904 AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21
7905 SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29
7906 XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31
7920 CMPXCHG 0F B0, 0F B1
7925 ------------------------------
7927 80 /0 = addb $imm8, rm8
7928 81 /0 = addl $imm32, rm32 and addw $imm16, rm16
7929 82 /0 = addb $imm8, rm8
7930 83 /0 = addl $simm8, rm32 and addw $simm8, rm16
7933 01 = addl r32, rm32 and addw r16, rm16
7935 Same for ADD OR ADC SBB AND SUB XOR
7938 FF /1 = dec rm32 and dec rm16
7941 FF /0 = inc rm32 and inc rm16
7944 F7 /3 = neg rm32 and neg rm16
7947 F7 /2 = not rm32 and not rm16
7949 0F BB = btcw r16, rm16 and btcl r32, rm32
7950 OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32
7954 static Bool
can_be_used_with_LOCK_prefix ( const UChar
* opc
)
7957 case 0x00: case 0x01: case 0x08: case 0x09:
7958 case 0x10: case 0x11: case 0x18: case 0x19:
7959 case 0x20: case 0x21: case 0x28: case 0x29:
7960 case 0x30: case 0x31:
7961 if (!epartIsReg(opc
[1]))
7965 case 0x80: case 0x81: case 0x82: case 0x83:
7966 if (gregOfRM(opc
[1]) >= 0 && gregOfRM(opc
[1]) <= 6
7967 && !epartIsReg(opc
[1]))
7971 case 0xFE: case 0xFF:
7972 if (gregOfRM(opc
[1]) >= 0 && gregOfRM(opc
[1]) <= 1
7973 && !epartIsReg(opc
[1]))
7977 case 0xF6: case 0xF7:
7978 if (gregOfRM(opc
[1]) >= 2 && gregOfRM(opc
[1]) <= 3
7979 && !epartIsReg(opc
[1]))
7983 case 0x86: case 0x87:
7984 if (!epartIsReg(opc
[1]))
7990 case 0xBB: case 0xB3: case 0xAB:
7991 if (!epartIsReg(opc
[2]))
7995 if (gregOfRM(opc
[2]) >= 5 && gregOfRM(opc
[2]) <= 7
7996 && !epartIsReg(opc
[2]))
7999 case 0xB0: case 0xB1:
8000 if (!epartIsReg(opc
[2]))
8004 if (gregOfRM(opc
[2]) == 1 && !epartIsReg(opc
[2]) )
8007 case 0xC0: case 0xC1:
8008 if (!epartIsReg(opc
[2]))
8013 } /* switch (opc[1]) */
8019 } /* switch (opc[0]) */
8024 static IRTemp
math_BSWAP ( IRTemp t1
, IRType ty
)
8026 IRTemp t2
= newTemp(ty
);
8027 if (ty
== Ity_I32
) {
8031 binop(Iop_Shl32
, mkexpr(t1
), mkU8(24)),
8034 binop(Iop_And32
, binop(Iop_Shl32
, mkexpr(t1
), mkU8(8)),
8037 binop(Iop_And32
, binop(Iop_Shr32
, mkexpr(t1
), mkU8(8)),
8039 binop(Iop_And32
, binop(Iop_Shr32
, mkexpr(t1
), mkU8(24)),
8045 if (ty
== Ity_I16
) {
8048 binop(Iop_Shl16
, mkexpr(t1
), mkU8(8)),
8049 binop(Iop_Shr16
, mkexpr(t1
), mkU8(8)) ));
8054 return IRTemp_INVALID
;
8057 /*------------------------------------------------------------*/
8058 /*--- Disassemble a single instruction ---*/
8059 /*------------------------------------------------------------*/
8061 /* Disassemble a single instruction into IR. The instruction is
8062 located in host memory at &guest_code[delta]. *expect_CAS is set
8063 to True if the resulting IR is expected to contain an IRCAS
8064 statement, and False if it's not expected to. This makes it
8065 possible for the caller of disInstr_X86_WRK to check that
8066 LOCK-prefixed instructions are at least plausibly translated, in
8067 that it becomes possible to check that a (validly) LOCK-prefixed
8068 instruction generates a translation containing an IRCAS, and
8069 instructions without LOCK prefixes don't generate translations
8070 containing an IRCAS.
8073 DisResult
disInstr_X86_WRK (
8074 /*OUT*/Bool
* expect_CAS
,
8075 Bool (*resteerOkFn
) ( /*opaque*/void*, Addr
),
8077 void* callback_opaque
,
8079 const VexArchInfo
* archinfo
,
8080 const VexAbiInfo
* vbi
,
8085 IRTemp addr
, t0
, t1
, t2
, t3
, t4
, t5
, t6
;
8087 UChar opc
, modrm
, abyte
, pre
;
8090 Int am_sz
, d_sz
, n_prefixes
;
8092 const UChar
* insn
; /* used in SSE decoders */
8094 /* The running delta */
8095 Int delta
= (Int
)delta64
;
8097 /* Holds eip at the start of the insn, so that we can print
8098 consistent error messages for unimplemented insns. */
8099 Int delta_start
= delta
;
8101 /* sz denotes the nominal data-op size of the insn; we change it to
8102 2 if an 0x66 prefix is seen */
8105 /* sorb holds the segment-override-prefix byte, if any. Zero if no
8106 prefix has been seen, else one of {0x26, 0x36, 0x3E, 0x64, 0x65}
8107 indicating the prefix. */
8110 /* Gets set to True if a LOCK prefix is seen. */
8111 Bool pfx_lock
= False
;
8113 /* Set result defaults. */
8114 dres
.whatNext
= Dis_Continue
;
8116 dres
.continueAt
= 0;
8117 dres
.hint
= Dis_HintNone
;
8118 dres
.jk_StopHere
= Ijk_INVALID
;
8120 *expect_CAS
= False
;
8122 addr
= t0
= t1
= t2
= t3
= t4
= t5
= t6
= IRTemp_INVALID
;
8124 vassert(guest_EIP_bbstart
+ delta
== guest_EIP_curr_instr
);
8125 DIP("\t0x%x: ", guest_EIP_bbstart
+delta
);
8127 /* Spot "Special" instructions (see comment at top of file). */
8129 const UChar
* code
= guest_code
+ delta
;
8130 /* Spot the 12-byte preamble:
8131 C1C703 roll $3, %edi
8132 C1C70D roll $13, %edi
8133 C1C71D roll $29, %edi
8134 C1C713 roll $19, %edi
8136 if (code
[ 0] == 0xC1 && code
[ 1] == 0xC7 && code
[ 2] == 0x03 &&
8137 code
[ 3] == 0xC1 && code
[ 4] == 0xC7 && code
[ 5] == 0x0D &&
8138 code
[ 6] == 0xC1 && code
[ 7] == 0xC7 && code
[ 8] == 0x1D &&
8139 code
[ 9] == 0xC1 && code
[10] == 0xC7 && code
[11] == 0x13) {
8140 /* Got a "Special" instruction preamble. Which one is it? */
8141 if (code
[12] == 0x87 && code
[13] == 0xDB /* xchgl %ebx,%ebx */) {
8142 /* %EDX = client_request ( %EAX ) */
8143 DIP("%%edx = client_request ( %%eax )\n");
8145 jmp_lit(&dres
, Ijk_ClientReq
, guest_EIP_bbstart
+delta
);
8146 vassert(dres
.whatNext
== Dis_StopHere
);
8147 goto decode_success
;
8150 if (code
[12] == 0x87 && code
[13] == 0xC9 /* xchgl %ecx,%ecx */) {
8151 /* %EAX = guest_NRADDR */
8152 DIP("%%eax = guest_NRADDR\n");
8154 putIReg(4, R_EAX
, IRExpr_Get( OFFB_NRADDR
, Ity_I32
));
8155 goto decode_success
;
8158 if (code
[12] == 0x87 && code
[13] == 0xD2 /* xchgl %edx,%edx */) {
8159 /* call-noredir *%EAX */
8160 DIP("call-noredir *%%eax\n");
8162 t1
= newTemp(Ity_I32
);
8163 assign(t1
, getIReg(4,R_EAX
));
8164 t2
= newTemp(Ity_I32
);
8165 assign(t2
, binop(Iop_Sub32
, getIReg(4,R_ESP
), mkU32(4)));
8166 putIReg(4, R_ESP
, mkexpr(t2
));
8167 storeLE( mkexpr(t2
), mkU32(guest_EIP_bbstart
+delta
));
8168 jmp_treg(&dres
, Ijk_NoRedir
, t1
);
8169 vassert(dres
.whatNext
== Dis_StopHere
);
8170 goto decode_success
;
8173 if (code
[12] == 0x87 && code
[13] == 0xFF /* xchgl %edi,%edi */) {
8175 DIP("IR injection\n");
8176 vex_inject_ir(irsb
, Iend_LE
);
8178 // Invalidate the current insn. The reason is that the IRop we're
8179 // injecting here can change. In which case the translation has to
8180 // be redone. For ease of handling, we simply invalidate all the
8182 stmt(IRStmt_Put(OFFB_CMSTART
, mkU32(guest_EIP_curr_instr
)));
8183 stmt(IRStmt_Put(OFFB_CMLEN
, mkU32(14)));
8187 stmt( IRStmt_Put( OFFB_EIP
, mkU32(guest_EIP_bbstart
+ delta
) ) );
8188 dres
.whatNext
= Dis_StopHere
;
8189 dres
.jk_StopHere
= Ijk_InvalICache
;
8190 goto decode_success
;
8192 /* We don't know what it is. */
8193 goto decode_failure
;
8198 /* Handle a couple of weird-ass NOPs that have been observed in the
8201 const UChar
* code
= guest_code
+ delta
;
8202 /* Sun's JVM 1.5.0 uses the following as a NOP:
8203 26 2E 64 65 90 %es:%cs:%fs:%gs:nop */
8204 if (code
[0] == 0x26 && code
[1] == 0x2E && code
[2] == 0x64
8205 && code
[3] == 0x65 && code
[4] == 0x90) {
8206 DIP("%%es:%%cs:%%fs:%%gs:nop\n");
8208 goto decode_success
;
8210 /* Don't barf on recent binutils padding,
8211 all variants of which are: nopw %cs:0x0(%eax,%eax,1)
8212 66 2e 0f 1f 84 00 00 00 00 00
8213 66 66 2e 0f 1f 84 00 00 00 00 00
8214 66 66 66 2e 0f 1f 84 00 00 00 00 00
8215 66 66 66 66 2e 0f 1f 84 00 00 00 00 00
8216 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00
8217 66 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00
8219 if (code
[0] == 0x66) {
8221 for (data16_cnt
= 1; data16_cnt
< 6; data16_cnt
++)
8222 if (code
[data16_cnt
] != 0x66)
8224 if (code
[data16_cnt
] == 0x2E && code
[data16_cnt
+ 1] == 0x0F
8225 && code
[data16_cnt
+ 2] == 0x1F && code
[data16_cnt
+ 3] == 0x84
8226 && code
[data16_cnt
+ 4] == 0x00 && code
[data16_cnt
+ 5] == 0x00
8227 && code
[data16_cnt
+ 6] == 0x00 && code
[data16_cnt
+ 7] == 0x00
8228 && code
[data16_cnt
+ 8] == 0x00 ) {
8229 DIP("nopw %%cs:0x0(%%eax,%%eax,1)\n");
8230 delta
+= 9 + data16_cnt
;
8231 goto decode_success
;
8235 // Intel CET requires the following opcodes to be treated as NOPs
8236 // with any prefix and ModRM, SIB and disp combination:
8237 // "0F 19", "0F 1C", "0F 1D", "0F 1E", "0F 1F"
8238 UInt opcode_index
= 0;
8239 // Skip any prefix combination
8240 UInt addr_override
= 0;
8242 Bool is_prefix
= True
;
8244 switch (code
[opcode_index
]) {
8253 case 0x26: case 0x3E: // if we set segment override here,
8254 case 0x64: case 0x65: // disAMode segfaults
8255 case 0x2E: case 0x36:
8256 case 0xF0: case 0xF2: case 0xF3:
8264 if (code
[opcode_index
] == 0x0F) {
8265 switch (code
[opcode_index
+1]) {
8267 case 0x1C: case 0x1D:
8268 case 0x1E: case 0x1F:
8269 delta
+= opcode_index
+2;
8270 modrm
= getUChar(delta
);
8271 if (epartIsReg(modrm
)) {
8273 DIP("nop%c\n", nameISize(temp_sz
));
8276 addr
= disAMode(&alen
, 0/*"no sorb"*/, delta
, dis_buf
);
8277 delta
+= alen
- addr_override
;
8278 DIP("nop%c %s\n", nameISize(temp_sz
), dis_buf
);
8280 goto decode_success
;
8286 /* Normal instruction handling starts here. */
8288 /* Deal with some but not all prefixes:
8291 2E(cs:) 3E(ds:) 26(es:) 64(fs:) 65(gs:) 36(ss:)
8292 Not dealt with (left in place):
8297 if (n_prefixes
> 7) goto decode_failure
;
8298 pre
= getUChar(delta
);
8307 case 0x3E: /* %DS: */
8308 case 0x26: /* %ES: */
8309 case 0x64: /* %FS: */
8310 case 0x65: /* %GS: */
8311 case 0x36: /* %SS: */
8313 goto decode_failure
; /* only one seg override allowed */
8316 case 0x2E: { /* %CS: */
8317 /* 2E prefix on a conditional branch instruction is a
8318 branch-prediction hint, which can safely be ignored. */
8319 UChar op1
= getIByte(delta
+1);
8320 UChar op2
= getIByte(delta
+2);
8321 if ((op1
>= 0x70 && op1
<= 0x7F)
8323 || (op1
== 0x0F && op2
>= 0x80 && op2
<= 0x8F)) {
8324 if (0) vex_printf("vex x86->IR: ignoring branch hint\n");
8326 /* All other CS override cases are not handled */
8327 goto decode_failure
;
8340 /* Now we should be looking at the primary opcode byte or the
8341 leading F2 or F3. Check that any LOCK prefix is actually
8345 if (can_be_used_with_LOCK_prefix( &guest_code
[delta
] )) {
8348 *expect_CAS
= False
;
8349 goto decode_failure
;
8354 /* ---------------------------------------------------- */
8355 /* --- The SSE decoder. --- */
8356 /* ---------------------------------------------------- */
8358 /* What did I do to deserve SSE ? Perhaps I was really bad in a
8361 /* Note, this doesn't handle SSE2 or SSE3. That is handled in a
8362 later section, further on. */
8364 insn
= &guest_code
[delta
];
8366 /* Treat fxsave specially. It should be doable even on an SSE0
8367 (Pentium-II class) CPU. Hence be prepared to handle it on
8368 any subarchitecture variant.
8371 /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory */
8372 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xAE
8373 && !epartIsReg(insn
[2]) && gregOfRM(insn
[2]) == 0) {
8375 modrm
= getIByte(delta
+2);
8377 vassert(!epartIsReg(modrm
));
8379 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
8381 gen_SEGV_if_not_16_aligned(addr
);
8383 DIP("fxsave %s\n", dis_buf
);
8385 /* Uses dirty helper:
8386 void x86g_do_FXSAVE ( VexGuestX86State*, UInt ) */
8387 d
= unsafeIRDirty_0_N (
8389 "x86g_dirtyhelper_FXSAVE",
8390 &x86g_dirtyhelper_FXSAVE
,
8391 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
8394 /* declare we're writing memory */
8396 d
->mAddr
= mkexpr(addr
);
8397 d
->mSize
= 464; /* according to recent Intel docs */
8399 /* declare we're reading guest state */
8401 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
8403 d
->fxState
[0].fx
= Ifx_Read
;
8404 d
->fxState
[0].offset
= OFFB_FTOP
;
8405 d
->fxState
[0].size
= sizeof(UInt
);
8407 d
->fxState
[1].fx
= Ifx_Read
;
8408 d
->fxState
[1].offset
= OFFB_FPREGS
;
8409 d
->fxState
[1].size
= 8 * sizeof(ULong
);
8411 d
->fxState
[2].fx
= Ifx_Read
;
8412 d
->fxState
[2].offset
= OFFB_FPTAGS
;
8413 d
->fxState
[2].size
= 8 * sizeof(UChar
);
8415 d
->fxState
[3].fx
= Ifx_Read
;
8416 d
->fxState
[3].offset
= OFFB_FPROUND
;
8417 d
->fxState
[3].size
= sizeof(UInt
);
8419 d
->fxState
[4].fx
= Ifx_Read
;
8420 d
->fxState
[4].offset
= OFFB_FC3210
;
8421 d
->fxState
[4].size
= sizeof(UInt
);
8423 d
->fxState
[5].fx
= Ifx_Read
;
8424 d
->fxState
[5].offset
= OFFB_XMM0
;
8425 d
->fxState
[5].size
= 8 * sizeof(U128
);
8427 d
->fxState
[6].fx
= Ifx_Read
;
8428 d
->fxState
[6].offset
= OFFB_SSEROUND
;
8429 d
->fxState
[6].size
= sizeof(UInt
);
8431 /* Be paranoid ... this assertion tries to ensure the 8 %xmm
8432 images are packed back-to-back. If not, the value of
8433 d->fxState[5].size is wrong. */
8434 vassert(16 == sizeof(U128
));
8435 vassert(OFFB_XMM7
== (OFFB_XMM0
+ 7 * 16));
8437 stmt( IRStmt_Dirty(d
) );
8439 goto decode_success
;
8442 /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory */
8443 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xAE
8444 && !epartIsReg(insn
[2]) && gregOfRM(insn
[2]) == 1) {
8446 modrm
= getIByte(delta
+2);
8448 vassert(!epartIsReg(modrm
));
8450 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
8452 gen_SEGV_if_not_16_aligned(addr
);
8454 DIP("fxrstor %s\n", dis_buf
);
8456 /* Uses dirty helper:
8457 VexEmNote x86g_do_FXRSTOR ( VexGuestX86State*, UInt )
8459 the VexEmNote value is simply ignored (unlike for FRSTOR)
8461 d
= unsafeIRDirty_0_N (
8463 "x86g_dirtyhelper_FXRSTOR",
8464 &x86g_dirtyhelper_FXRSTOR
,
8465 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
8468 /* declare we're reading memory */
8470 d
->mAddr
= mkexpr(addr
);
8471 d
->mSize
= 464; /* according to recent Intel docs */
8473 /* declare we're writing guest state */
8475 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
8477 d
->fxState
[0].fx
= Ifx_Write
;
8478 d
->fxState
[0].offset
= OFFB_FTOP
;
8479 d
->fxState
[0].size
= sizeof(UInt
);
8481 d
->fxState
[1].fx
= Ifx_Write
;
8482 d
->fxState
[1].offset
= OFFB_FPREGS
;
8483 d
->fxState
[1].size
= 8 * sizeof(ULong
);
8485 d
->fxState
[2].fx
= Ifx_Write
;
8486 d
->fxState
[2].offset
= OFFB_FPTAGS
;
8487 d
->fxState
[2].size
= 8 * sizeof(UChar
);
8489 d
->fxState
[3].fx
= Ifx_Write
;
8490 d
->fxState
[3].offset
= OFFB_FPROUND
;
8491 d
->fxState
[3].size
= sizeof(UInt
);
8493 d
->fxState
[4].fx
= Ifx_Write
;
8494 d
->fxState
[4].offset
= OFFB_FC3210
;
8495 d
->fxState
[4].size
= sizeof(UInt
);
8497 d
->fxState
[5].fx
= Ifx_Write
;
8498 d
->fxState
[5].offset
= OFFB_XMM0
;
8499 d
->fxState
[5].size
= 8 * sizeof(U128
);
8501 d
->fxState
[6].fx
= Ifx_Write
;
8502 d
->fxState
[6].offset
= OFFB_SSEROUND
;
8503 d
->fxState
[6].size
= sizeof(UInt
);
8505 /* Be paranoid ... this assertion tries to ensure the 8 %xmm
8506 images are packed back-to-back. If not, the value of
8507 d->fxState[5].size is wrong. */
8508 vassert(16 == sizeof(U128
));
8509 vassert(OFFB_XMM7
== (OFFB_XMM0
+ 7 * 16));
8511 stmt( IRStmt_Dirty(d
) );
8513 goto decode_success
;
8516 /* ------ SSE decoder main ------ */
8518 /* Skip parts of the decoder which don't apply given the stated
8519 guest subarchitecture. */
8520 if (archinfo
->hwcaps
== 0/*baseline, no sse at all*/)
8521 goto after_sse_decoders
;
8523 /* With mmxext only some extended MMX instructions are recognized.
8524 The mmxext instructions are MASKMOVQ MOVNTQ PAVGB PAVGW PMAXSW
8525 PMAXUB PMINSW PMINUB PMULHUW PSADBW PSHUFW PEXTRW PINSRW PMOVMSKB
8526 PREFETCHNTA PREFETCHT0 PREFETCHT1 PREFETCHT2 SFENCE
8528 http://support.amd.com/us/Embedded_TechDocs/22466.pdf
8529 https://en.wikipedia.org/wiki/3DNow!#3DNow.21_extensions */
8531 if (archinfo
->hwcaps
== VEX_HWCAPS_X86_MMXEXT
/*integer only sse1 subset*/)
8534 /* Otherwise we must be doing sse1 or sse2, so we can at least try
8537 /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */
8538 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x58) {
8539 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "addps", Iop_Add32Fx4
);
8540 goto decode_success
;
8543 /* F3 0F 58 = ADDSS -- add 32F0x4 from R/M to R */
8544 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x58) {
8546 delta
= dis_SSE_E_to_G_lo32( sorb
, delta
+3, "addss", Iop_Add32F0x4
);
8547 goto decode_success
;
8550 /* 0F 55 = ANDNPS -- G = (not G) and E */
8551 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x55) {
8552 delta
= dis_SSE_E_to_G_all_invG( sorb
, delta
+2, "andnps", Iop_AndV128
);
8553 goto decode_success
;
8556 /* 0F 54 = ANDPS -- G = G and E */
8557 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x54) {
8558 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "andps", Iop_AndV128
);
8559 goto decode_success
;
8562 /* 0F C2 = CMPPS -- 32Fx4 comparison from R/M to R */
8563 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xC2) {
8564 delta
= dis_SSEcmp_E_to_G( sorb
, delta
+2, "cmpps", True
, 4 );
8565 goto decode_success
;
8568 /* F3 0F C2 = CMPSS -- 32F0x4 comparison from R/M to R */
8569 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0xC2) {
8571 delta
= dis_SSEcmp_E_to_G( sorb
, delta
+3, "cmpss", False
, 4 );
8572 goto decode_success
;
8575 /* 0F 2F = COMISS -- 32F0x4 comparison G,E, and set ZCP */
8576 /* 0F 2E = UCOMISS -- 32F0x4 comparison G,E, and set ZCP */
8577 if (sz
== 4 && insn
[0] == 0x0F && (insn
[1] == 0x2F || insn
[1] == 0x2E)) {
8578 IRTemp argL
= newTemp(Ity_F32
);
8579 IRTemp argR
= newTemp(Ity_F32
);
8580 modrm
= getIByte(delta
+2);
8581 if (epartIsReg(modrm
)) {
8582 assign( argR
, getXMMRegLane32F( eregOfRM(modrm
), 0/*lowest lane*/ ) );
8584 DIP("[u]comiss %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
8585 nameXMMReg(gregOfRM(modrm
)) );
8587 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
8588 assign( argR
, loadLE(Ity_F32
, mkexpr(addr
)) );
8590 DIP("[u]comiss %s,%s\n", dis_buf
,
8591 nameXMMReg(gregOfRM(modrm
)) );
8593 assign( argL
, getXMMRegLane32F( gregOfRM(modrm
), 0/*lowest lane*/ ) );
8595 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
8596 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
8601 unop(Iop_F32toF64
,mkexpr(argL
)),
8602 unop(Iop_F32toF64
,mkexpr(argR
))),
8605 /* Set NDEP even though it isn't used. This makes redundant-PUT
8606 elimination of previous stores to this field work better. */
8607 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
8608 goto decode_success
;
8611 /* 0F 2A = CVTPI2PS -- convert 2 x I32 in mem/mmx to 2 x F32 in low
8613 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x2A) {
8614 IRTemp arg64
= newTemp(Ity_I64
);
8615 IRTemp rmode
= newTemp(Ity_I32
);
8618 modrm
= getIByte(delta
+2);
8619 if (epartIsReg(modrm
)) {
8620 /* Only switch to MMX mode if the source is a MMX register.
8621 See comments on CVTPI2PD for details. Fixes #357059. */
8623 assign( arg64
, getMMXReg(eregOfRM(modrm
)) );
8625 DIP("cvtpi2ps %s,%s\n", nameMMXReg(eregOfRM(modrm
)),
8626 nameXMMReg(gregOfRM(modrm
)));
8628 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
8629 assign( arg64
, loadLE(Ity_I64
, mkexpr(addr
)) );
8631 DIP("cvtpi2ps %s,%s\n", dis_buf
,
8632 nameXMMReg(gregOfRM(modrm
)) );
8635 assign( rmode
, get_sse_roundingmode() );
8642 unop(Iop_64to32
, mkexpr(arg64
)) )) );
8649 unop(Iop_64HIto32
, mkexpr(arg64
)) )) );
8651 goto decode_success
;
8654 /* F3 0F 2A = CVTSI2SS -- convert I32 in mem/ireg to F32 in low
8656 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x2A) {
8657 IRTemp arg32
= newTemp(Ity_I32
);
8658 IRTemp rmode
= newTemp(Ity_I32
);
8661 modrm
= getIByte(delta
+3);
8662 if (epartIsReg(modrm
)) {
8663 assign( arg32
, getIReg(4, eregOfRM(modrm
)) );
8665 DIP("cvtsi2ss %s,%s\n", nameIReg(4, eregOfRM(modrm
)),
8666 nameXMMReg(gregOfRM(modrm
)));
8668 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
8669 assign( arg32
, loadLE(Ity_I32
, mkexpr(addr
)) );
8671 DIP("cvtsi2ss %s,%s\n", dis_buf
,
8672 nameXMMReg(gregOfRM(modrm
)) );
8675 assign( rmode
, get_sse_roundingmode() );
8681 unop(Iop_I32StoF64
, mkexpr(arg32
)) ) );
8683 goto decode_success
;
8686 /* 0F 2D = CVTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
8687 I32 in mmx, according to prevailing SSE rounding mode */
8688 /* 0F 2C = CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
8689 I32 in mmx, rounding towards zero */
8690 if (sz
== 4 && insn
[0] == 0x0F && (insn
[1] == 0x2D || insn
[1] == 0x2C)) {
8691 IRTemp dst64
= newTemp(Ity_I64
);
8692 IRTemp rmode
= newTemp(Ity_I32
);
8693 IRTemp f32lo
= newTemp(Ity_F32
);
8694 IRTemp f32hi
= newTemp(Ity_F32
);
8695 Bool r2zero
= toBool(insn
[1] == 0x2C);
8698 modrm
= getIByte(delta
+2);
8700 if (epartIsReg(modrm
)) {
8702 assign(f32lo
, getXMMRegLane32F(eregOfRM(modrm
), 0));
8703 assign(f32hi
, getXMMRegLane32F(eregOfRM(modrm
), 1));
8704 DIP("cvt%sps2pi %s,%s\n", r2zero
? "t" : "",
8705 nameXMMReg(eregOfRM(modrm
)),
8706 nameMMXReg(gregOfRM(modrm
)));
8708 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
8709 assign(f32lo
, loadLE(Ity_F32
, mkexpr(addr
)));
8710 assign(f32hi
, loadLE(Ity_F32
, binop( Iop_Add32
,
8714 DIP("cvt%sps2pi %s,%s\n", r2zero
? "t" : "",
8716 nameMMXReg(gregOfRM(modrm
)));
8720 assign(rmode
, mkU32((UInt
)Irrm_ZERO
) );
8722 assign( rmode
, get_sse_roundingmode() );
8727 binop( Iop_32HLto64
,
8728 binop( Iop_F64toI32S
,
8730 unop( Iop_F32toF64
, mkexpr(f32hi
) ) ),
8731 binop( Iop_F64toI32S
,
8733 unop( Iop_F32toF64
, mkexpr(f32lo
) ) )
8737 putMMXReg(gregOfRM(modrm
), mkexpr(dst64
));
8738 goto decode_success
;
8741 /* F3 0F 2D = CVTSS2SI -- convert F32 in mem/low quarter xmm to
8742 I32 in ireg, according to prevailing SSE rounding mode */
8743 /* F3 0F 2C = CVTTSS2SI -- convert F32 in mem/low quarter xmm to
8744 I32 in ireg, rounding towards zero */
8745 if (insn
[0] == 0xF3 && insn
[1] == 0x0F
8746 && (insn
[2] == 0x2D || insn
[2] == 0x2C)) {
8747 IRTemp rmode
= newTemp(Ity_I32
);
8748 IRTemp f32lo
= newTemp(Ity_F32
);
8749 Bool r2zero
= toBool(insn
[2] == 0x2C);
8752 modrm
= getIByte(delta
+3);
8753 if (epartIsReg(modrm
)) {
8755 assign(f32lo
, getXMMRegLane32F(eregOfRM(modrm
), 0));
8756 DIP("cvt%sss2si %s,%s\n", r2zero
? "t" : "",
8757 nameXMMReg(eregOfRM(modrm
)),
8758 nameIReg(4, gregOfRM(modrm
)));
8760 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
8761 assign(f32lo
, loadLE(Ity_F32
, mkexpr(addr
)));
8763 DIP("cvt%sss2si %s,%s\n", r2zero
? "t" : "",
8765 nameIReg(4, gregOfRM(modrm
)));
8769 assign( rmode
, mkU32((UInt
)Irrm_ZERO
) );
8771 assign( rmode
, get_sse_roundingmode() );
8774 putIReg(4, gregOfRM(modrm
),
8775 binop( Iop_F64toI32S
,
8777 unop( Iop_F32toF64
, mkexpr(f32lo
) ) )
8780 goto decode_success
;
8783 /* 0F 5E = DIVPS -- div 32Fx4 from R/M to R */
8784 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x5E) {
8785 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "divps", Iop_Div32Fx4
);
8786 goto decode_success
;
8789 /* F3 0F 5E = DIVSS -- div 32F0x4 from R/M to R */
8790 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x5E) {
8792 delta
= dis_SSE_E_to_G_lo32( sorb
, delta
+3, "divss", Iop_Div32F0x4
);
8793 goto decode_success
;
8796 /* 0F AE /2 = LDMXCSR m32 -- load %mxcsr */
8797 if (insn
[0] == 0x0F && insn
[1] == 0xAE
8798 && !epartIsReg(insn
[2]) && gregOfRM(insn
[2]) == 2) {
8800 IRTemp t64
= newTemp(Ity_I64
);
8801 IRTemp ew
= newTemp(Ity_I32
);
8803 modrm
= getIByte(delta
+2);
8804 vassert(!epartIsReg(modrm
));
8807 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
8809 DIP("ldmxcsr %s\n", dis_buf
);
8811 /* The only thing we observe in %mxcsr is the rounding mode.
8812 Therefore, pass the 32-bit value (SSE native-format control
8813 word) to a clean helper, getting back a 64-bit value, the
8814 lower half of which is the SSEROUND value to store, and the
8815 upper half of which is the emulation-warning token which may
8818 /* ULong x86h_check_ldmxcsr ( UInt ); */
8819 assign( t64
, mkIRExprCCall(
8820 Ity_I64
, 0/*regparms*/,
8821 "x86g_check_ldmxcsr",
8822 &x86g_check_ldmxcsr
,
8823 mkIRExprVec_1( loadLE(Ity_I32
, mkexpr(addr
)) )
8827 put_sse_roundingmode( unop(Iop_64to32
, mkexpr(t64
)) );
8828 assign( ew
, unop(Iop_64HIto32
, mkexpr(t64
) ) );
8829 put_emwarn( mkexpr(ew
) );
8830 /* Finally, if an emulation warning was reported, side-exit to
8831 the next insn, reporting the warning, so that Valgrind's
8832 dispatcher sees the warning. */
8835 binop(Iop_CmpNE32
, mkexpr(ew
), mkU32(0)),
8837 IRConst_U32( ((Addr32
)guest_EIP_bbstart
)+delta
),
8841 goto decode_success
;
8845 /* mmxext sse1 subset starts here. mmxext only arches will parse
8846 only this subset of the sse1 instructions. */
8849 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8850 /* 0F F7 = MASKMOVQ -- 8x8 masked store */
8851 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xF7) {
8853 delta
= dis_MMX( &ok
, sorb
, sz
, delta
+1 );
8855 goto decode_failure
;
8856 goto decode_success
;
8859 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8860 /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the
8861 Intel manual does not say anything about the usual business of
8862 the FP reg tags getting trashed whenever an MMX insn happens.
8863 So we just leave them alone.
8865 if (insn
[0] == 0x0F && insn
[1] == 0xE7) {
8866 modrm
= getIByte(delta
+2);
8867 if (sz
== 4 && !epartIsReg(modrm
)) {
8868 /* do_MMX_preamble(); Intel docs don't specify this */
8869 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
8870 storeLE( mkexpr(addr
), getMMXReg(gregOfRM(modrm
)) );
8871 DIP("movntq %s,%s\n", dis_buf
,
8872 nameMMXReg(gregOfRM(modrm
)));
8874 goto decode_success
;
8876 /* else fall through */
8879 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8880 /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */
8881 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xE0) {
8883 delta
= dis_MMXop_regmem_to_reg (
8884 sorb
, delta
+2, insn
[1], "pavgb", False
);
8885 goto decode_success
;
8888 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8889 /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */
8890 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xE3) {
8892 delta
= dis_MMXop_regmem_to_reg (
8893 sorb
, delta
+2, insn
[1], "pavgw", False
);
8894 goto decode_success
;
8897 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8898 /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put
8899 zero-extend of it in ireg(G). */
8900 if (insn
[0] == 0x0F && insn
[1] == 0xC5) {
8902 if (sz
== 4 && epartIsReg(modrm
)) {
8903 IRTemp sV
= newTemp(Ity_I64
);
8904 t5
= newTemp(Ity_I16
);
8906 assign(sV
, getMMXReg(eregOfRM(modrm
)));
8907 breakup64to16s( sV
, &t3
, &t2
, &t1
, &t0
);
8908 switch (insn
[3] & 3) {
8909 case 0: assign(t5
, mkexpr(t0
)); break;
8910 case 1: assign(t5
, mkexpr(t1
)); break;
8911 case 2: assign(t5
, mkexpr(t2
)); break;
8912 case 3: assign(t5
, mkexpr(t3
)); break;
8913 default: vassert(0); /*NOTREACHED*/
8915 putIReg(4, gregOfRM(modrm
), unop(Iop_16Uto32
, mkexpr(t5
)));
8916 DIP("pextrw $%d,%s,%s\n",
8917 (Int
)insn
[3], nameMMXReg(eregOfRM(modrm
)),
8918 nameIReg(4,gregOfRM(modrm
)));
8920 goto decode_success
;
8922 /* else fall through */
8925 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8926 /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
8927 put it into the specified lane of mmx(G). */
8928 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xC4) {
8929 /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the
8930 mmx reg. t4 is the new lane value. t5 is the original
8931 mmx value. t6 is the new mmx value. */
8933 t4
= newTemp(Ity_I16
);
8934 t5
= newTemp(Ity_I64
);
8935 t6
= newTemp(Ity_I64
);
8939 assign(t5
, getMMXReg(gregOfRM(modrm
)));
8940 breakup64to16s( t5
, &t3
, &t2
, &t1
, &t0
);
8942 if (epartIsReg(modrm
)) {
8943 assign(t4
, getIReg(2, eregOfRM(modrm
)));
8946 DIP("pinsrw $%d,%s,%s\n", lane
,
8947 nameIReg(2,eregOfRM(modrm
)),
8948 nameMMXReg(gregOfRM(modrm
)));
8950 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
8952 lane
= insn
[3+alen
-1];
8953 assign(t4
, loadLE(Ity_I16
, mkexpr(addr
)));
8954 DIP("pinsrw $%d,%s,%s\n", lane
,
8956 nameMMXReg(gregOfRM(modrm
)));
8960 case 0: assign(t6
, mk64from16s(t3
,t2
,t1
,t4
)); break;
8961 case 1: assign(t6
, mk64from16s(t3
,t2
,t4
,t0
)); break;
8962 case 2: assign(t6
, mk64from16s(t3
,t4
,t1
,t0
)); break;
8963 case 3: assign(t6
, mk64from16s(t4
,t2
,t1
,t0
)); break;
8964 default: vassert(0); /*NOTREACHED*/
8966 putMMXReg(gregOfRM(modrm
), mkexpr(t6
));
8967 goto decode_success
;
8970 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8971 /* 0F EE = PMAXSW -- 16x4 signed max */
8972 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xEE) {
8974 delta
= dis_MMXop_regmem_to_reg (
8975 sorb
, delta
+2, insn
[1], "pmaxsw", False
);
8976 goto decode_success
;
8979 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8980 /* 0F DE = PMAXUB -- 8x8 unsigned max */
8981 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xDE) {
8983 delta
= dis_MMXop_regmem_to_reg (
8984 sorb
, delta
+2, insn
[1], "pmaxub", False
);
8985 goto decode_success
;
8988 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8989 /* 0F EA = PMINSW -- 16x4 signed min */
8990 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xEA) {
8992 delta
= dis_MMXop_regmem_to_reg (
8993 sorb
, delta
+2, insn
[1], "pminsw", False
);
8994 goto decode_success
;
8997 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8998 /* 0F DA = PMINUB -- 8x8 unsigned min */
8999 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xDA) {
9001 delta
= dis_MMXop_regmem_to_reg (
9002 sorb
, delta
+2, insn
[1], "pminub", False
);
9003 goto decode_success
;
9006 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9007 /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in
9008 mmx(E), turn them into a byte, and put zero-extend of it in
9010 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xD7) {
9012 if (epartIsReg(modrm
)) {
9014 t0
= newTemp(Ity_I64
);
9015 t1
= newTemp(Ity_I32
);
9016 assign(t0
, getMMXReg(eregOfRM(modrm
)));
9017 assign(t1
, unop(Iop_8Uto32
, unop(Iop_GetMSBs8x8
, mkexpr(t0
))));
9018 putIReg(4, gregOfRM(modrm
), mkexpr(t1
));
9019 DIP("pmovmskb %s,%s\n", nameMMXReg(eregOfRM(modrm
)),
9020 nameIReg(4,gregOfRM(modrm
)));
9022 goto decode_success
;
9024 /* else fall through */
9027 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9028 /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */
9029 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xE4) {
9031 delta
= dis_MMXop_regmem_to_reg (
9032 sorb
, delta
+2, insn
[1], "pmuluh", False
);
9033 goto decode_success
;
9036 /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */
9037 /* 0F 18 /1 = PREFETCH0 -- with various different hints */
9038 /* 0F 18 /2 = PREFETCH1 */
9039 /* 0F 18 /3 = PREFETCH2 */
9040 if (insn
[0] == 0x0F && insn
[1] == 0x18
9041 && !epartIsReg(insn
[2])
9042 && gregOfRM(insn
[2]) >= 0 && gregOfRM(insn
[2]) <= 3) {
9043 const HChar
* hintstr
= "??";
9045 modrm
= getIByte(delta
+2);
9046 vassert(!epartIsReg(modrm
));
9048 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9051 switch (gregOfRM(modrm
)) {
9052 case 0: hintstr
= "nta"; break;
9053 case 1: hintstr
= "t0"; break;
9054 case 2: hintstr
= "t1"; break;
9055 case 3: hintstr
= "t2"; break;
9056 default: vassert(0); /*NOTREACHED*/
9059 DIP("prefetch%s %s\n", hintstr
, dis_buf
);
9060 goto decode_success
;
9063 /* 0F 0D /0 = PREFETCH m8 -- 3DNow! prefetch */
9064 /* 0F 0D /1 = PREFETCHW m8 -- ditto, with some other hint */
9065 if (insn
[0] == 0x0F && insn
[1] == 0x0D
9066 && !epartIsReg(insn
[2])
9067 && gregOfRM(insn
[2]) >= 0 && gregOfRM(insn
[2]) <= 1) {
9068 const HChar
* hintstr
= "??";
9070 modrm
= getIByte(delta
+2);
9071 vassert(!epartIsReg(modrm
));
9073 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9076 switch (gregOfRM(modrm
)) {
9077 case 0: hintstr
= ""; break;
9078 case 1: hintstr
= "w"; break;
9079 default: vassert(0); /*NOTREACHED*/
9082 DIP("prefetch%s %s\n", hintstr
, dis_buf
);
9083 goto decode_success
;
9086 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9087 /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */
9088 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xF6) {
9090 delta
= dis_MMXop_regmem_to_reg (
9091 sorb
, delta
+2, insn
[1], "psadbw", False
);
9092 goto decode_success
;
9095 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9096 /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */
9097 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x70) {
9099 IRTemp sV
, dV
, s3
, s2
, s1
, s0
;
9100 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
9101 sV
= newTemp(Ity_I64
);
9102 dV
= newTemp(Ity_I64
);
9105 if (epartIsReg(modrm
)) {
9106 assign( sV
, getMMXReg(eregOfRM(modrm
)) );
9107 order
= (Int
)insn
[3];
9109 DIP("pshufw $%d,%s,%s\n", order
,
9110 nameMMXReg(eregOfRM(modrm
)),
9111 nameMMXReg(gregOfRM(modrm
)));
9113 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9114 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
9115 order
= (Int
)insn
[2+alen
];
9117 DIP("pshufw $%d,%s,%s\n", order
,
9119 nameMMXReg(gregOfRM(modrm
)));
9121 breakup64to16s( sV
, &s3
, &s2
, &s1
, &s0
);
9124 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
9126 mk64from16s( SEL((order
>>6)&3), SEL((order
>>4)&3),
9127 SEL((order
>>2)&3), SEL((order
>>0)&3) )
9129 putMMXReg(gregOfRM(modrm
), mkexpr(dV
));
9131 goto decode_success
;
9134 /* 0F AE /7 = SFENCE -- flush pending operations to memory */
9135 if (insn
[0] == 0x0F && insn
[1] == 0xAE
9136 && epartIsReg(insn
[2]) && gregOfRM(insn
[2]) == 7) {
9139 /* Insert a memory fence. It's sometimes important that these
9140 are carried through to the generated code. */
9141 stmt( IRStmt_MBE(Imbe_Fence
) );
9143 goto decode_success
;
9146 /* End of mmxext sse1 subset. No more sse parsing for mmxext only arches. */
9147 if (archinfo
->hwcaps
== VEX_HWCAPS_X86_MMXEXT
/*integer only sse1 subset*/)
9148 goto after_sse_decoders
;
9151 /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */
9152 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x5F) {
9153 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "maxps", Iop_Max32Fx4
);
9154 goto decode_success
;
9157 /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */
9158 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x5F) {
9160 delta
= dis_SSE_E_to_G_lo32( sorb
, delta
+3, "maxss", Iop_Max32F0x4
);
9161 goto decode_success
;
9164 /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */
9165 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x5D) {
9166 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "minps", Iop_Min32Fx4
);
9167 goto decode_success
;
9170 /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */
9171 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x5D) {
9173 delta
= dis_SSE_E_to_G_lo32( sorb
, delta
+3, "minss", Iop_Min32F0x4
);
9174 goto decode_success
;
9177 /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */
9178 /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */
9179 if (sz
== 4 && insn
[0] == 0x0F && (insn
[1] == 0x28 || insn
[1] == 0x10)) {
9180 modrm
= getIByte(delta
+2);
9181 if (epartIsReg(modrm
)) {
9182 putXMMReg( gregOfRM(modrm
),
9183 getXMMReg( eregOfRM(modrm
) ));
9184 DIP("mov[ua]ps %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
9185 nameXMMReg(gregOfRM(modrm
)));
9188 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9189 if (insn
[1] == 0x28/*movaps*/)
9190 gen_SEGV_if_not_16_aligned( addr
);
9191 putXMMReg( gregOfRM(modrm
),
9192 loadLE(Ity_V128
, mkexpr(addr
)) );
9193 DIP("mov[ua]ps %s,%s\n", dis_buf
,
9194 nameXMMReg(gregOfRM(modrm
)));
9197 goto decode_success
;
9200 /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */
9201 /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */
9202 if (sz
== 4 && insn
[0] == 0x0F
9203 && (insn
[1] == 0x29 || insn
[1] == 0x11)) {
9204 modrm
= getIByte(delta
+2);
9205 if (epartIsReg(modrm
)) {
9206 /* fall through; awaiting test case */
9208 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9209 if (insn
[1] == 0x29/*movaps*/)
9210 gen_SEGV_if_not_16_aligned( addr
);
9211 storeLE( mkexpr(addr
), getXMMReg(gregOfRM(modrm
)) );
9212 DIP("mov[ua]ps %s,%s\n", nameXMMReg(gregOfRM(modrm
)),
9215 goto decode_success
;
9219 /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */
9220 /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */
9221 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x16) {
9222 modrm
= getIByte(delta
+2);
9223 if (epartIsReg(modrm
)) {
9225 putXMMRegLane64( gregOfRM(modrm
), 1/*upper lane*/,
9226 getXMMRegLane64( eregOfRM(modrm
), 0 ) );
9227 DIP("movhps %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
9228 nameXMMReg(gregOfRM(modrm
)));
9230 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9232 putXMMRegLane64( gregOfRM(modrm
), 1/*upper lane*/,
9233 loadLE(Ity_I64
, mkexpr(addr
)) );
9234 DIP("movhps %s,%s\n", dis_buf
,
9235 nameXMMReg( gregOfRM(modrm
) ));
9237 goto decode_success
;
9240 /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */
9241 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x17) {
9242 if (!epartIsReg(insn
[2])) {
9244 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
9246 storeLE( mkexpr(addr
),
9247 getXMMRegLane64( gregOfRM(insn
[2]),
9248 1/*upper lane*/ ) );
9249 DIP("movhps %s,%s\n", nameXMMReg( gregOfRM(insn
[2]) ),
9251 goto decode_success
;
9253 /* else fall through */
9256 /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */
9257 /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */
9258 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x12) {
9259 modrm
= getIByte(delta
+2);
9260 if (epartIsReg(modrm
)) {
9262 putXMMRegLane64( gregOfRM(modrm
),
9264 getXMMRegLane64( eregOfRM(modrm
), 1 ));
9265 DIP("movhlps %s, %s\n", nameXMMReg(eregOfRM(modrm
)),
9266 nameXMMReg(gregOfRM(modrm
)));
9268 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9270 putXMMRegLane64( gregOfRM(modrm
), 0/*lower lane*/,
9271 loadLE(Ity_I64
, mkexpr(addr
)) );
9272 DIP("movlps %s, %s\n",
9273 dis_buf
, nameXMMReg( gregOfRM(modrm
) ));
9275 goto decode_success
;
9278 /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */
9279 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x13) {
9280 if (!epartIsReg(insn
[2])) {
9282 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
9284 storeLE( mkexpr(addr
),
9285 getXMMRegLane64( gregOfRM(insn
[2]),
9286 0/*lower lane*/ ) );
9287 DIP("movlps %s, %s\n", nameXMMReg( gregOfRM(insn
[2]) ),
9289 goto decode_success
;
9291 /* else fall through */
9294 /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E)
9295 to 4 lowest bits of ireg(G) */
9296 if (insn
[0] == 0x0F && insn
[1] == 0x50) {
9297 modrm
= getIByte(delta
+2);
9298 if (sz
== 4 && epartIsReg(modrm
)) {
9300 t0
= newTemp(Ity_I32
);
9301 t1
= newTemp(Ity_I32
);
9302 t2
= newTemp(Ity_I32
);
9303 t3
= newTemp(Ity_I32
);
9305 src
= eregOfRM(modrm
);
9306 assign( t0
, binop( Iop_And32
,
9307 binop(Iop_Shr32
, getXMMRegLane32(src
,0), mkU8(31)),
9309 assign( t1
, binop( Iop_And32
,
9310 binop(Iop_Shr32
, getXMMRegLane32(src
,1), mkU8(30)),
9312 assign( t2
, binop( Iop_And32
,
9313 binop(Iop_Shr32
, getXMMRegLane32(src
,2), mkU8(29)),
9315 assign( t3
, binop( Iop_And32
,
9316 binop(Iop_Shr32
, getXMMRegLane32(src
,3), mkU8(28)),
9318 putIReg(4, gregOfRM(modrm
),
9320 binop(Iop_Or32
, mkexpr(t0
), mkexpr(t1
)),
9321 binop(Iop_Or32
, mkexpr(t2
), mkexpr(t3
))
9324 DIP("movmskps %s,%s\n", nameXMMReg(src
),
9325 nameIReg(4, gregOfRM(modrm
)));
9326 goto decode_success
;
9328 /* else fall through */
9331 /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */
9332 /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */
9333 if (insn
[0] == 0x0F && insn
[1] == 0x2B) {
9334 modrm
= getIByte(delta
+2);
9335 if (!epartIsReg(modrm
)) {
9336 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9337 gen_SEGV_if_not_16_aligned( addr
);
9338 storeLE( mkexpr(addr
), getXMMReg(gregOfRM(modrm
)) );
9339 DIP("movntp%s %s,%s\n", sz
==2 ? "d" : "s",
9341 nameXMMReg(gregOfRM(modrm
)));
9343 goto decode_success
;
9345 /* else fall through */
9348 /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G
9349 (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */
9350 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x10) {
9352 modrm
= getIByte(delta
+3);
9353 if (epartIsReg(modrm
)) {
9354 putXMMRegLane32( gregOfRM(modrm
), 0,
9355 getXMMRegLane32( eregOfRM(modrm
), 0 ));
9356 DIP("movss %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
9357 nameXMMReg(gregOfRM(modrm
)));
9360 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
9361 /* zero bits 127:64 */
9362 putXMMRegLane64( gregOfRM(modrm
), 1, mkU64(0) );
9363 /* zero bits 63:32 */
9364 putXMMRegLane32( gregOfRM(modrm
), 1, mkU32(0) );
9365 /* write bits 31:0 */
9366 putXMMRegLane32( gregOfRM(modrm
), 0,
9367 loadLE(Ity_I32
, mkexpr(addr
)) );
9368 DIP("movss %s,%s\n", dis_buf
,
9369 nameXMMReg(gregOfRM(modrm
)));
9372 goto decode_success
;
9375 /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem
9377 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x11) {
9379 modrm
= getIByte(delta
+3);
9380 if (epartIsReg(modrm
)) {
9381 /* fall through, we don't yet have a test case */
9383 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
9384 storeLE( mkexpr(addr
),
9385 getXMMRegLane32(gregOfRM(modrm
), 0) );
9386 DIP("movss %s,%s\n", nameXMMReg(gregOfRM(modrm
)),
9389 goto decode_success
;
9393 /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */
9394 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x59) {
9395 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "mulps", Iop_Mul32Fx4
);
9396 goto decode_success
;
9399 /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */
9400 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x59) {
9402 delta
= dis_SSE_E_to_G_lo32( sorb
, delta
+3, "mulss", Iop_Mul32F0x4
);
9403 goto decode_success
;
9406 /* 0F 56 = ORPS -- G = G and E */
9407 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x56) {
9408 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "orps", Iop_OrV128
);
9409 goto decode_success
;
9412 /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */
9413 if (insn
[0] == 0x0F && insn
[1] == 0x53) {
9415 delta
= dis_SSE_E_to_G_unary_all( sorb
, delta
+2,
9416 "rcpps", Iop_RecipEst32Fx4
);
9417 goto decode_success
;
9420 /* F3 0F 53 = RCPSS -- approx reciprocal 32F0x4 from R/M to R */
9421 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x53) {
9423 delta
= dis_SSE_E_to_G_unary_lo32( sorb
, delta
+3,
9424 "rcpss", Iop_RecipEst32F0x4
);
9425 goto decode_success
;
9428 /* 0F 52 = RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to R */
9429 if (insn
[0] == 0x0F && insn
[1] == 0x52) {
9431 delta
= dis_SSE_E_to_G_unary_all( sorb
, delta
+2,
9432 "rsqrtps", Iop_RSqrtEst32Fx4
);
9433 goto decode_success
;
9436 /* F3 0F 52 = RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/M to R */
9437 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x52) {
9439 delta
= dis_SSE_E_to_G_unary_lo32( sorb
, delta
+3,
9440 "rsqrtss", Iop_RSqrtEst32F0x4
);
9441 goto decode_success
;
9444 /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */
9445 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xC6) {
9448 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
9449 sV
= newTemp(Ity_V128
);
9450 dV
= newTemp(Ity_V128
);
9451 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
9453 assign( dV
, getXMMReg(gregOfRM(modrm
)) );
9455 if (epartIsReg(modrm
)) {
9456 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
9457 select
= (Int
)insn
[3];
9459 DIP("shufps $%d,%s,%s\n", select
,
9460 nameXMMReg(eregOfRM(modrm
)),
9461 nameXMMReg(gregOfRM(modrm
)));
9463 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9464 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
9465 select
= (Int
)insn
[2+alen
];
9467 DIP("shufps $%d,%s,%s\n", select
,
9469 nameXMMReg(gregOfRM(modrm
)));
9472 breakup128to32s( dV
, &d3
, &d2
, &d1
, &d0
);
9473 breakup128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
9475 # define SELD(n) ((n)==0 ? d0 : ((n)==1 ? d1 : ((n)==2 ? d2 : d3)))
9476 # define SELS(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
9480 mk128from32s( SELS((select
>>6)&3), SELS((select
>>4)&3),
9481 SELD((select
>>2)&3), SELD((select
>>0)&3) )
9487 goto decode_success
;
9490 /* 0F 51 = SQRTPS -- approx sqrt 32Fx4 from R/M to R */
9491 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x51) {
9492 delta
= dis_SSE_E_to_G_unary_all( sorb
, delta
+2,
9493 "sqrtps", Iop_Sqrt32Fx4
);
9494 goto decode_success
;
9497 /* F3 0F 51 = SQRTSS -- approx sqrt 32F0x4 from R/M to R */
9498 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x51) {
9500 delta
= dis_SSE_E_to_G_unary_lo32( sorb
, delta
+3,
9501 "sqrtss", Iop_Sqrt32F0x4
);
9502 goto decode_success
;
9505 /* 0F AE /3 = STMXCSR m32 -- store %mxcsr */
9506 if (insn
[0] == 0x0F && insn
[1] == 0xAE
9507 && !epartIsReg(insn
[2]) && gregOfRM(insn
[2]) == 3) {
9508 modrm
= getIByte(delta
+2);
9510 vassert(!epartIsReg(modrm
));
9512 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9515 /* Fake up a native SSE mxcsr word. The only thing it depends
9516 on is SSEROUND[1:0], so call a clean helper to cook it up.
9518 /* UInt x86h_create_mxcsr ( UInt sseround ) */
9519 DIP("stmxcsr %s\n", dis_buf
);
9520 storeLE( mkexpr(addr
),
9523 "x86g_create_mxcsr", &x86g_create_mxcsr
,
9524 mkIRExprVec_1( get_sse_roundingmode() )
9527 goto decode_success
;
9530 /* 0F 5C = SUBPS -- sub 32Fx4 from R/M to R */
9531 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x5C) {
9532 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "subps", Iop_Sub32Fx4
);
9533 goto decode_success
;
9536 /* F3 0F 5C = SUBSS -- sub 32F0x4 from R/M to R */
9537 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x5C) {
9539 delta
= dis_SSE_E_to_G_lo32( sorb
, delta
+3, "subss", Iop_Sub32F0x4
);
9540 goto decode_success
;
9543 /* 0F 15 = UNPCKHPS -- unpack and interleave high part F32s */
9544 /* 0F 14 = UNPCKLPS -- unpack and interleave low part F32s */
9545 /* These just appear to be special cases of SHUFPS */
9546 if (sz
== 4 && insn
[0] == 0x0F && (insn
[1] == 0x15 || insn
[1] == 0x14)) {
9548 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
9549 Bool hi
= toBool(insn
[1] == 0x15);
9550 sV
= newTemp(Ity_V128
);
9551 dV
= newTemp(Ity_V128
);
9552 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
9554 assign( dV
, getXMMReg(gregOfRM(modrm
)) );
9556 if (epartIsReg(modrm
)) {
9557 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
9559 DIP("unpck%sps %s,%s\n", hi
? "h" : "l",
9560 nameXMMReg(eregOfRM(modrm
)),
9561 nameXMMReg(gregOfRM(modrm
)));
9563 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9564 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
9566 DIP("unpck%sps %s,%s\n", hi
? "h" : "l",
9568 nameXMMReg(gregOfRM(modrm
)));
9571 breakup128to32s( dV
, &d3
, &d2
, &d1
, &d0
);
9572 breakup128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
9575 putXMMReg( gregOfRM(modrm
), mk128from32s( s3
, d3
, s2
, d2
) );
9577 putXMMReg( gregOfRM(modrm
), mk128from32s( s1
, d1
, s0
, d0
) );
9580 goto decode_success
;
9583 /* 0F 57 = XORPS -- G = G and E */
9584 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x57) {
9585 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "xorps", Iop_XorV128
);
9586 goto decode_success
;
9589 /* ---------------------------------------------------- */
9590 /* --- end of the SSE decoder. --- */
9591 /* ---------------------------------------------------- */
9593 /* ---------------------------------------------------- */
9594 /* --- start of the SSE2 decoder. --- */
9595 /* ---------------------------------------------------- */
9597 /* Skip parts of the decoder which don't apply given the stated
9598 guest subarchitecture. */
9599 if (0 == (archinfo
->hwcaps
& VEX_HWCAPS_X86_SSE2
))
9600 goto after_sse_decoders
; /* no SSE2 capabilities */
9602 insn
= &guest_code
[delta
];
9604 /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */
9605 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x58) {
9606 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "addpd", Iop_Add64Fx2
);
9607 goto decode_success
;
9610 /* F2 0F 58 = ADDSD -- add 64F0x2 from R/M to R */
9611 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0x58) {
9613 delta
= dis_SSE_E_to_G_lo64( sorb
, delta
+3, "addsd", Iop_Add64F0x2
);
9614 goto decode_success
;
9617 /* 66 0F 55 = ANDNPD -- G = (not G) and E */
9618 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x55) {
9619 delta
= dis_SSE_E_to_G_all_invG( sorb
, delta
+2, "andnpd", Iop_AndV128
);
9620 goto decode_success
;
9623 /* 66 0F 54 = ANDPD -- G = G and E */
9624 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x54) {
9625 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "andpd", Iop_AndV128
);
9626 goto decode_success
;
9629 /* 66 0F C2 = CMPPD -- 64Fx2 comparison from R/M to R */
9630 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xC2) {
9631 delta
= dis_SSEcmp_E_to_G( sorb
, delta
+2, "cmppd", True
, 8 );
9632 goto decode_success
;
9635 /* F2 0F C2 = CMPSD -- 64F0x2 comparison from R/M to R */
9636 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0xC2) {
9638 delta
= dis_SSEcmp_E_to_G( sorb
, delta
+3, "cmpsd", False
, 8 );
9639 goto decode_success
;
9642 /* 66 0F 2F = COMISD -- 64F0x2 comparison G,E, and set ZCP */
9643 /* 66 0F 2E = UCOMISD -- 64F0x2 comparison G,E, and set ZCP */
9644 if (sz
== 2 && insn
[0] == 0x0F && (insn
[1] == 0x2F || insn
[1] == 0x2E)) {
9645 IRTemp argL
= newTemp(Ity_F64
);
9646 IRTemp argR
= newTemp(Ity_F64
);
9647 modrm
= getIByte(delta
+2);
9648 if (epartIsReg(modrm
)) {
9649 assign( argR
, getXMMRegLane64F( eregOfRM(modrm
), 0/*lowest lane*/ ) );
9651 DIP("[u]comisd %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
9652 nameXMMReg(gregOfRM(modrm
)) );
9654 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9655 assign( argR
, loadLE(Ity_F64
, mkexpr(addr
)) );
9657 DIP("[u]comisd %s,%s\n", dis_buf
,
9658 nameXMMReg(gregOfRM(modrm
)) );
9660 assign( argL
, getXMMRegLane64F( gregOfRM(modrm
), 0/*lowest lane*/ ) );
9662 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
9663 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
9667 binop(Iop_CmpF64
, mkexpr(argL
), mkexpr(argR
)),
9670 /* Set NDEP even though it isn't used. This makes redundant-PUT
9671 elimination of previous stores to this field work better. */
9672 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
9673 goto decode_success
;
9676 /* F3 0F E6 = CVTDQ2PD -- convert 2 x I32 in mem/lo half xmm to 2 x
9678 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0xE6) {
9679 IRTemp arg64
= newTemp(Ity_I64
);
9682 modrm
= getIByte(delta
+3);
9683 if (epartIsReg(modrm
)) {
9684 assign( arg64
, getXMMRegLane64(eregOfRM(modrm
), 0) );
9686 DIP("cvtdq2pd %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
9687 nameXMMReg(gregOfRM(modrm
)));
9689 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
9690 assign( arg64
, loadLE(Ity_I64
, mkexpr(addr
)) );
9692 DIP("cvtdq2pd %s,%s\n", dis_buf
,
9693 nameXMMReg(gregOfRM(modrm
)) );
9698 unop(Iop_I32StoF64
, unop(Iop_64to32
, mkexpr(arg64
)))
9703 unop(Iop_I32StoF64
, unop(Iop_64HIto32
, mkexpr(arg64
)))
9706 goto decode_success
;
9709 /* 0F 5B = CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 in
9711 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x5B) {
9712 IRTemp argV
= newTemp(Ity_V128
);
9713 IRTemp rmode
= newTemp(Ity_I32
);
9715 modrm
= getIByte(delta
+2);
9716 if (epartIsReg(modrm
)) {
9717 assign( argV
, getXMMReg(eregOfRM(modrm
)) );
9719 DIP("cvtdq2ps %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
9720 nameXMMReg(gregOfRM(modrm
)));
9722 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9723 assign( argV
, loadLE(Ity_V128
, mkexpr(addr
)) );
9725 DIP("cvtdq2ps %s,%s\n", dis_buf
,
9726 nameXMMReg(gregOfRM(modrm
)) );
9729 assign( rmode
, get_sse_roundingmode() );
9730 breakup128to32s( argV
, &t3
, &t2
, &t1
, &t0
);
9732 # define CVT(_t) binop( Iop_F64toF32, \
9734 unop(Iop_I32StoF64,mkexpr(_t)))
9736 putXMMRegLane32F( gregOfRM(modrm
), 3, CVT(t3
) );
9737 putXMMRegLane32F( gregOfRM(modrm
), 2, CVT(t2
) );
9738 putXMMRegLane32F( gregOfRM(modrm
), 1, CVT(t1
) );
9739 putXMMRegLane32F( gregOfRM(modrm
), 0, CVT(t0
) );
9743 goto decode_success
;
9746 /* F2 0F E6 = CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
9747 lo half xmm(G), and zero upper half */
9748 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0xE6) {
9749 IRTemp argV
= newTemp(Ity_V128
);
9750 IRTemp rmode
= newTemp(Ity_I32
);
9753 modrm
= getIByte(delta
+3);
9754 if (epartIsReg(modrm
)) {
9755 assign( argV
, getXMMReg(eregOfRM(modrm
)) );
9757 DIP("cvtpd2dq %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
9758 nameXMMReg(gregOfRM(modrm
)));
9760 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
9761 assign( argV
, loadLE(Ity_V128
, mkexpr(addr
)) );
9763 DIP("cvtpd2dq %s,%s\n", dis_buf
,
9764 nameXMMReg(gregOfRM(modrm
)) );
9767 assign( rmode
, get_sse_roundingmode() );
9768 t0
= newTemp(Ity_F64
);
9769 t1
= newTemp(Ity_F64
);
9770 assign( t0
, unop(Iop_ReinterpI64asF64
,
9771 unop(Iop_V128to64
, mkexpr(argV
))) );
9772 assign( t1
, unop(Iop_ReinterpI64asF64
,
9773 unop(Iop_V128HIto64
, mkexpr(argV
))) );
9775 # define CVT(_t) binop( Iop_F64toI32S, \
9779 putXMMRegLane32( gregOfRM(modrm
), 3, mkU32(0) );
9780 putXMMRegLane32( gregOfRM(modrm
), 2, mkU32(0) );
9781 putXMMRegLane32( gregOfRM(modrm
), 1, CVT(t1
) );
9782 putXMMRegLane32( gregOfRM(modrm
), 0, CVT(t0
) );
9786 goto decode_success
;
9789 /* 66 0F 2D = CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
9790 I32 in mmx, according to prevailing SSE rounding mode */
9791 /* 66 0F 2C = CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
9792 I32 in mmx, rounding towards zero */
9793 if (sz
== 2 && insn
[0] == 0x0F && (insn
[1] == 0x2D || insn
[1] == 0x2C)) {
9794 IRTemp dst64
= newTemp(Ity_I64
);
9795 IRTemp rmode
= newTemp(Ity_I32
);
9796 IRTemp f64lo
= newTemp(Ity_F64
);
9797 IRTemp f64hi
= newTemp(Ity_F64
);
9798 Bool r2zero
= toBool(insn
[1] == 0x2C);
9801 modrm
= getIByte(delta
+2);
9803 if (epartIsReg(modrm
)) {
9805 assign(f64lo
, getXMMRegLane64F(eregOfRM(modrm
), 0));
9806 assign(f64hi
, getXMMRegLane64F(eregOfRM(modrm
), 1));
9807 DIP("cvt%spd2pi %s,%s\n", r2zero
? "t" : "",
9808 nameXMMReg(eregOfRM(modrm
)),
9809 nameMMXReg(gregOfRM(modrm
)));
9811 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9812 assign(f64lo
, loadLE(Ity_F64
, mkexpr(addr
)));
9813 assign(f64hi
, loadLE(Ity_F64
, binop( Iop_Add32
,
9817 DIP("cvt%spf2pi %s,%s\n", r2zero
? "t" : "",
9819 nameMMXReg(gregOfRM(modrm
)));
9823 assign(rmode
, mkU32((UInt
)Irrm_ZERO
) );
9825 assign( rmode
, get_sse_roundingmode() );
9830 binop( Iop_32HLto64
,
9831 binop( Iop_F64toI32S
, mkexpr(rmode
), mkexpr(f64hi
) ),
9832 binop( Iop_F64toI32S
, mkexpr(rmode
), mkexpr(f64lo
) )
9836 putMMXReg(gregOfRM(modrm
), mkexpr(dst64
));
9837 goto decode_success
;
9840 /* 66 0F 5A = CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F32 in
9841 lo half xmm(G), and zero upper half */
9842 /* Note, this is practically identical to CVTPD2DQ. It would have
9843 been nicer to merge them together, but the insn[] offsets differ
9845 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x5A) {
9846 IRTemp argV
= newTemp(Ity_V128
);
9847 IRTemp rmode
= newTemp(Ity_I32
);
9849 modrm
= getIByte(delta
+2);
9850 if (epartIsReg(modrm
)) {
9851 assign( argV
, getXMMReg(eregOfRM(modrm
)) );
9853 DIP("cvtpd2ps %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
9854 nameXMMReg(gregOfRM(modrm
)));
9856 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9857 assign( argV
, loadLE(Ity_V128
, mkexpr(addr
)) );
9859 DIP("cvtpd2ps %s,%s\n", dis_buf
,
9860 nameXMMReg(gregOfRM(modrm
)) );
9863 assign( rmode
, get_sse_roundingmode() );
9864 t0
= newTemp(Ity_F64
);
9865 t1
= newTemp(Ity_F64
);
9866 assign( t0
, unop(Iop_ReinterpI64asF64
,
9867 unop(Iop_V128to64
, mkexpr(argV
))) );
9868 assign( t1
, unop(Iop_ReinterpI64asF64
,
9869 unop(Iop_V128HIto64
, mkexpr(argV
))) );
9871 # define CVT(_t) binop( Iop_F64toF32, \
9875 putXMMRegLane32( gregOfRM(modrm
), 3, mkU32(0) );
9876 putXMMRegLane32( gregOfRM(modrm
), 2, mkU32(0) );
9877 putXMMRegLane32F( gregOfRM(modrm
), 1, CVT(t1
) );
9878 putXMMRegLane32F( gregOfRM(modrm
), 0, CVT(t0
) );
9882 goto decode_success
;
9885 /* 66 0F 2A = CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F64 in
9887 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x2A) {
9888 IRTemp arg64
= newTemp(Ity_I64
);
9890 modrm
= getIByte(delta
+2);
9891 if (epartIsReg(modrm
)) {
9892 /* Only switch to MMX mode if the source is a MMX register.
9893 This is inconsistent with all other instructions which
9894 convert between XMM and (M64 or MMX), which always switch
9895 to MMX mode even if 64-bit operand is M64 and not MMX. At
9896 least, that's what the Intel docs seem to me to say.
9899 assign( arg64
, getMMXReg(eregOfRM(modrm
)) );
9901 DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregOfRM(modrm
)),
9902 nameXMMReg(gregOfRM(modrm
)));
9904 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9905 assign( arg64
, loadLE(Ity_I64
, mkexpr(addr
)) );
9907 DIP("cvtpi2pd %s,%s\n", dis_buf
,
9908 nameXMMReg(gregOfRM(modrm
)) );
9913 unop(Iop_I32StoF64
, unop(Iop_64to32
, mkexpr(arg64
)) )
9918 unop(Iop_I32StoF64
, unop(Iop_64HIto32
, mkexpr(arg64
)) )
9921 goto decode_success
;
9924 /* 66 0F 5B = CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
9926 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x5B) {
9927 IRTemp argV
= newTemp(Ity_V128
);
9928 IRTemp rmode
= newTemp(Ity_I32
);
9930 modrm
= getIByte(delta
+2);
9931 if (epartIsReg(modrm
)) {
9932 assign( argV
, getXMMReg(eregOfRM(modrm
)) );
9934 DIP("cvtps2dq %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
9935 nameXMMReg(gregOfRM(modrm
)));
9937 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9938 assign( argV
, loadLE(Ity_V128
, mkexpr(addr
)) );
9940 DIP("cvtps2dq %s,%s\n", dis_buf
,
9941 nameXMMReg(gregOfRM(modrm
)) );
9944 assign( rmode
, get_sse_roundingmode() );
9945 breakup128to32s( argV
, &t3
, &t2
, &t1
, &t0
);
9947 /* This is less than ideal. If it turns out to be a performance
9948 bottleneck it can be improved. */
9950 binop( Iop_F64toI32S, \
9952 unop( Iop_F32toF64, \
9953 unop( Iop_ReinterpI32asF32, mkexpr(_t))) )
9955 putXMMRegLane32( gregOfRM(modrm
), 3, CVT(t3
) );
9956 putXMMRegLane32( gregOfRM(modrm
), 2, CVT(t2
) );
9957 putXMMRegLane32( gregOfRM(modrm
), 1, CVT(t1
) );
9958 putXMMRegLane32( gregOfRM(modrm
), 0, CVT(t0
) );
9962 goto decode_success
;
9965 /* 0F 5A = CVTPS2PD -- convert 2 x F32 in low half mem/xmm to 2 x
9967 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x5A) {
9968 IRTemp f32lo
= newTemp(Ity_F32
);
9969 IRTemp f32hi
= newTemp(Ity_F32
);
9971 modrm
= getIByte(delta
+2);
9972 if (epartIsReg(modrm
)) {
9973 assign( f32lo
, getXMMRegLane32F(eregOfRM(modrm
), 0) );
9974 assign( f32hi
, getXMMRegLane32F(eregOfRM(modrm
), 1) );
9976 DIP("cvtps2pd %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
9977 nameXMMReg(gregOfRM(modrm
)));
9979 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9980 assign( f32lo
, loadLE(Ity_F32
, mkexpr(addr
)) );
9981 assign( f32hi
, loadLE(Ity_F32
,
9982 binop(Iop_Add32
,mkexpr(addr
),mkU32(4))) );
9984 DIP("cvtps2pd %s,%s\n", dis_buf
,
9985 nameXMMReg(gregOfRM(modrm
)) );
9988 putXMMRegLane64F( gregOfRM(modrm
), 1,
9989 unop(Iop_F32toF64
, mkexpr(f32hi
)) );
9990 putXMMRegLane64F( gregOfRM(modrm
), 0,
9991 unop(Iop_F32toF64
, mkexpr(f32lo
)) );
9993 goto decode_success
;
9996 /* F2 0F 2D = CVTSD2SI -- convert F64 in mem/low half xmm to
9997 I32 in ireg, according to prevailing SSE rounding mode */
9998 /* F2 0F 2C = CVTTSD2SI -- convert F64 in mem/low half xmm to
9999 I32 in ireg, rounding towards zero */
10000 if (insn
[0] == 0xF2 && insn
[1] == 0x0F
10001 && (insn
[2] == 0x2D || insn
[2] == 0x2C)) {
10002 IRTemp rmode
= newTemp(Ity_I32
);
10003 IRTemp f64lo
= newTemp(Ity_F64
);
10004 Bool r2zero
= toBool(insn
[2] == 0x2C);
10007 modrm
= getIByte(delta
+3);
10008 if (epartIsReg(modrm
)) {
10010 assign(f64lo
, getXMMRegLane64F(eregOfRM(modrm
), 0));
10011 DIP("cvt%ssd2si %s,%s\n", r2zero
? "t" : "",
10012 nameXMMReg(eregOfRM(modrm
)),
10013 nameIReg(4, gregOfRM(modrm
)));
10015 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
10016 assign(f64lo
, loadLE(Ity_F64
, mkexpr(addr
)));
10018 DIP("cvt%ssd2si %s,%s\n", r2zero
? "t" : "",
10020 nameIReg(4, gregOfRM(modrm
)));
10024 assign( rmode
, mkU32((UInt
)Irrm_ZERO
) );
10026 assign( rmode
, get_sse_roundingmode() );
10029 putIReg(4, gregOfRM(modrm
),
10030 binop( Iop_F64toI32S
, mkexpr(rmode
), mkexpr(f64lo
)) );
10032 goto decode_success
;
10035 /* F2 0F 5A = CVTSD2SS -- convert F64 in mem/low half xmm to F32 in
10036 low 1/4 xmm(G), according to prevailing SSE rounding mode */
10037 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0x5A) {
10038 IRTemp rmode
= newTemp(Ity_I32
);
10039 IRTemp f64lo
= newTemp(Ity_F64
);
10042 modrm
= getIByte(delta
+3);
10043 if (epartIsReg(modrm
)) {
10045 assign(f64lo
, getXMMRegLane64F(eregOfRM(modrm
), 0));
10046 DIP("cvtsd2ss %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
10047 nameXMMReg(gregOfRM(modrm
)));
10049 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
10050 assign(f64lo
, loadLE(Ity_F64
, mkexpr(addr
)));
10052 DIP("cvtsd2ss %s,%s\n", dis_buf
,
10053 nameXMMReg(gregOfRM(modrm
)));
10056 assign( rmode
, get_sse_roundingmode() );
10058 gregOfRM(modrm
), 0,
10059 binop( Iop_F64toF32
, mkexpr(rmode
), mkexpr(f64lo
) )
10062 goto decode_success
;
10065 /* F2 0F 2A = CVTSI2SD -- convert I32 in mem/ireg to F64 in low
10067 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0x2A) {
10068 IRTemp arg32
= newTemp(Ity_I32
);
10071 modrm
= getIByte(delta
+3);
10072 if (epartIsReg(modrm
)) {
10073 assign( arg32
, getIReg(4, eregOfRM(modrm
)) );
10075 DIP("cvtsi2sd %s,%s\n", nameIReg(4, eregOfRM(modrm
)),
10076 nameXMMReg(gregOfRM(modrm
)));
10078 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
10079 assign( arg32
, loadLE(Ity_I32
, mkexpr(addr
)) );
10081 DIP("cvtsi2sd %s,%s\n", dis_buf
,
10082 nameXMMReg(gregOfRM(modrm
)) );
10086 gregOfRM(modrm
), 0,
10087 unop(Iop_I32StoF64
, mkexpr(arg32
)) );
10089 goto decode_success
;
10092 /* F3 0F 5A = CVTSS2SD -- convert F32 in mem/low 1/4 xmm to F64 in
10094 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x5A) {
10095 IRTemp f32lo
= newTemp(Ity_F32
);
10098 modrm
= getIByte(delta
+3);
10099 if (epartIsReg(modrm
)) {
10101 assign(f32lo
, getXMMRegLane32F(eregOfRM(modrm
), 0));
10102 DIP("cvtss2sd %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
10103 nameXMMReg(gregOfRM(modrm
)));
10105 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
10106 assign(f32lo
, loadLE(Ity_F32
, mkexpr(addr
)));
10108 DIP("cvtss2sd %s,%s\n", dis_buf
,
10109 nameXMMReg(gregOfRM(modrm
)));
10112 putXMMRegLane64F( gregOfRM(modrm
), 0,
10113 unop( Iop_F32toF64
, mkexpr(f32lo
) ) );
10115 goto decode_success
;
10118 /* 66 0F E6 = CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
10119 lo half xmm(G), and zero upper half, rounding towards zero */
10120 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xE6) {
10121 IRTemp argV
= newTemp(Ity_V128
);
10122 IRTemp rmode
= newTemp(Ity_I32
);
10124 modrm
= getIByte(delta
+2);
10125 if (epartIsReg(modrm
)) {
10126 assign( argV
, getXMMReg(eregOfRM(modrm
)) );
10128 DIP("cvttpd2dq %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
10129 nameXMMReg(gregOfRM(modrm
)));
10131 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
10132 assign( argV
, loadLE(Ity_V128
, mkexpr(addr
)) );
10134 DIP("cvttpd2dq %s,%s\n", dis_buf
,
10135 nameXMMReg(gregOfRM(modrm
)) );
10138 assign( rmode
, mkU32((UInt
)Irrm_ZERO
) );
10140 t0
= newTemp(Ity_F64
);
10141 t1
= newTemp(Ity_F64
);
10142 assign( t0
, unop(Iop_ReinterpI64asF64
,
10143 unop(Iop_V128to64
, mkexpr(argV
))) );
10144 assign( t1
, unop(Iop_ReinterpI64asF64
,
10145 unop(Iop_V128HIto64
, mkexpr(argV
))) );
10147 # define CVT(_t) binop( Iop_F64toI32S, \
10151 putXMMRegLane32( gregOfRM(modrm
), 3, mkU32(0) );
10152 putXMMRegLane32( gregOfRM(modrm
), 2, mkU32(0) );
10153 putXMMRegLane32( gregOfRM(modrm
), 1, CVT(t1
) );
10154 putXMMRegLane32( gregOfRM(modrm
), 0, CVT(t0
) );
10158 goto decode_success
;
10161 /* F3 0F 5B = CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
10162 xmm(G), rounding towards zero */
10163 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x5B) {
10164 IRTemp argV
= newTemp(Ity_V128
);
10165 IRTemp rmode
= newTemp(Ity_I32
);
10168 modrm
= getIByte(delta
+3);
10169 if (epartIsReg(modrm
)) {
10170 assign( argV
, getXMMReg(eregOfRM(modrm
)) );
10172 DIP("cvttps2dq %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
10173 nameXMMReg(gregOfRM(modrm
)));
10175 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
10176 assign( argV
, loadLE(Ity_V128
, mkexpr(addr
)) );
10178 DIP("cvttps2dq %s,%s\n", dis_buf
,
10179 nameXMMReg(gregOfRM(modrm
)) );
10182 assign( rmode
, mkU32((UInt
)Irrm_ZERO
) );
10183 breakup128to32s( argV
, &t3
, &t2
, &t1
, &t0
);
10185 /* This is less than ideal. If it turns out to be a performance
10186 bottleneck it can be improved. */
10188 binop( Iop_F64toI32S, \
10190 unop( Iop_F32toF64, \
10191 unop( Iop_ReinterpI32asF32, mkexpr(_t))) )
10193 putXMMRegLane32( gregOfRM(modrm
), 3, CVT(t3
) );
10194 putXMMRegLane32( gregOfRM(modrm
), 2, CVT(t2
) );
10195 putXMMRegLane32( gregOfRM(modrm
), 1, CVT(t1
) );
10196 putXMMRegLane32( gregOfRM(modrm
), 0, CVT(t0
) );
10200 goto decode_success
;
10203 /* 66 0F 5E = DIVPD -- div 64Fx2 from R/M to R */
10204 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x5E) {
10205 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "divpd", Iop_Div64Fx2
);
10206 goto decode_success
;
10209 /* F2 0F 5E = DIVSD -- div 64F0x2 from R/M to R */
10210 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0x5E) {
10212 delta
= dis_SSE_E_to_G_lo64( sorb
, delta
+3, "divsd", Iop_Div64F0x2
);
10213 goto decode_success
;
10216 /* 0F AE /5 = LFENCE -- flush pending operations to memory */
10217 /* 0F AE /6 = MFENCE -- flush pending operations to memory */
10218 if (insn
[0] == 0x0F && insn
[1] == 0xAE
10219 && epartIsReg(insn
[2])
10220 && (gregOfRM(insn
[2]) == 5 || gregOfRM(insn
[2]) == 6)) {
10223 /* Insert a memory fence. It's sometimes important that these
10224 are carried through to the generated code. */
10225 stmt( IRStmt_MBE(Imbe_Fence
) );
10226 DIP("%sfence\n", gregOfRM(insn
[2])==5 ? "l" : "m");
10227 goto decode_success
;
10230 /* 66 0F 5F = MAXPD -- max 64Fx2 from R/M to R */
10231 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x5F) {
10232 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "maxpd", Iop_Max64Fx2
);
10233 goto decode_success
;
10236 /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */
10237 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0x5F) {
10239 delta
= dis_SSE_E_to_G_lo64( sorb
, delta
+3, "maxsd", Iop_Max64F0x2
);
10240 goto decode_success
;
10243 /* 66 0F 5D = MINPD -- min 64Fx2 from R/M to R */
10244 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x5D) {
10245 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "minpd", Iop_Min64Fx2
);
10246 goto decode_success
;
10249 /* F2 0F 5D = MINSD -- min 64F0x2 from R/M to R */
10250 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0x5D) {
10252 delta
= dis_SSE_E_to_G_lo64( sorb
, delta
+3, "minsd", Iop_Min64F0x2
);
10253 goto decode_success
;
10256 /* 66 0F 28 = MOVAPD -- move from E (mem or xmm) to G (xmm). */
10257 /* 66 0F 10 = MOVUPD -- move from E (mem or xmm) to G (xmm). */
10258 /* 66 0F 6F = MOVDQA -- move from E (mem or xmm) to G (xmm). */
10259 if (sz
== 2 && insn
[0] == 0x0F
10260 && (insn
[1] == 0x28 || insn
[1] == 0x10 || insn
[1] == 0x6F)) {
10261 const HChar
* wot
= insn
[1]==0x28 ? "apd" :
10262 insn
[1]==0x10 ? "upd" : "dqa";
10263 modrm
= getIByte(delta
+2);
10264 if (epartIsReg(modrm
)) {
10265 putXMMReg( gregOfRM(modrm
),
10266 getXMMReg( eregOfRM(modrm
) ));
10267 DIP("mov%s %s,%s\n", wot
, nameXMMReg(eregOfRM(modrm
)),
10268 nameXMMReg(gregOfRM(modrm
)));
10271 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
10272 if (insn
[1] == 0x28/*movapd*/ || insn
[1] == 0x6F/*movdqa*/)
10273 gen_SEGV_if_not_16_aligned( addr
);
10274 putXMMReg( gregOfRM(modrm
),
10275 loadLE(Ity_V128
, mkexpr(addr
)) );
10276 DIP("mov%s %s,%s\n", wot
, dis_buf
,
10277 nameXMMReg(gregOfRM(modrm
)));
10280 goto decode_success
;
10283 /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */
10284 /* 66 0F 11 = MOVUPD -- move from G (xmm) to E (mem or xmm). */
10285 if (sz
== 2 && insn
[0] == 0x0F
10286 && (insn
[1] == 0x29 || insn
[1] == 0x11)) {
10287 const HChar
* wot
= insn
[1]==0x29 ? "apd" : "upd";
10288 modrm
= getIByte(delta
+2);
10289 if (epartIsReg(modrm
)) {
10290 /* fall through; awaiting test case */
10292 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
10293 if (insn
[1] == 0x29/*movapd*/)
10294 gen_SEGV_if_not_16_aligned( addr
);
10295 storeLE( mkexpr(addr
), getXMMReg(gregOfRM(modrm
)) );
10296 DIP("mov%s %s,%s\n", wot
, nameXMMReg(gregOfRM(modrm
)),
10299 goto decode_success
;
10303 /* 66 0F 6E = MOVD from r/m32 to xmm, zeroing high 3/4 of xmm. */
10304 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x6E) {
10305 modrm
= getIByte(delta
+2);
10306 if (epartIsReg(modrm
)) {
10310 unop( Iop_32UtoV128
, getIReg(4, eregOfRM(modrm
)) )
10312 DIP("movd %s, %s\n",
10313 nameIReg(4,eregOfRM(modrm
)), nameXMMReg(gregOfRM(modrm
)));
10315 addr
= disAMode( &alen
, sorb
, delta
+2, dis_buf
);
10319 unop( Iop_32UtoV128
,loadLE(Ity_I32
, mkexpr(addr
)) )
10321 DIP("movd %s, %s\n", dis_buf
, nameXMMReg(gregOfRM(modrm
)));
10323 goto decode_success
;
10326 /* 66 0F 7E = MOVD from xmm low 1/4 to r/m32. */
10327 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x7E) {
10328 modrm
= getIByte(delta
+2);
10329 if (epartIsReg(modrm
)) {
10331 putIReg( 4, eregOfRM(modrm
),
10332 getXMMRegLane32(gregOfRM(modrm
), 0) );
10333 DIP("movd %s, %s\n",
10334 nameXMMReg(gregOfRM(modrm
)), nameIReg(4,eregOfRM(modrm
)));
10336 addr
= disAMode( &alen
, sorb
, delta
+2, dis_buf
);
10338 storeLE( mkexpr(addr
),
10339 getXMMRegLane32(gregOfRM(modrm
), 0) );
10340 DIP("movd %s, %s\n", nameXMMReg(gregOfRM(modrm
)), dis_buf
);
10342 goto decode_success
;
10345 /* 66 0F 7F = MOVDQA -- move from G (xmm) to E (mem or xmm). */
10346 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x7F) {
10347 modrm
= getIByte(delta
+2);
10348 if (epartIsReg(modrm
)) {
10350 putXMMReg( eregOfRM(modrm
),
10351 getXMMReg(gregOfRM(modrm
)) );
10352 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRM(modrm
)),
10353 nameXMMReg(eregOfRM(modrm
)));
10355 addr
= disAMode( &alen
, sorb
, delta
+2, dis_buf
);
10357 gen_SEGV_if_not_16_aligned( addr
);
10358 storeLE( mkexpr(addr
), getXMMReg(gregOfRM(modrm
)) );
10359 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRM(modrm
)), dis_buf
);
10361 goto decode_success
;
10364 /* F3 0F 6F = MOVDQU -- move from E (mem or xmm) to G (xmm). */
10365 /* Unfortunately can't simply use the MOVDQA case since the
10366 prefix lengths are different (66 vs F3) */
10367 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x6F) {
10369 modrm
= getIByte(delta
+3);
10370 if (epartIsReg(modrm
)) {
10371 putXMMReg( gregOfRM(modrm
),
10372 getXMMReg( eregOfRM(modrm
) ));
10373 DIP("movdqu %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
10374 nameXMMReg(gregOfRM(modrm
)));
10377 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
10378 putXMMReg( gregOfRM(modrm
),
10379 loadLE(Ity_V128
, mkexpr(addr
)) );
10380 DIP("movdqu %s,%s\n", dis_buf
,
10381 nameXMMReg(gregOfRM(modrm
)));
10384 goto decode_success
;
10387 /* F3 0F 7F = MOVDQU -- move from G (xmm) to E (mem or xmm). */
10388 /* Unfortunately can't simply use the MOVDQA case since the
10389 prefix lengths are different (66 vs F3) */
10390 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x7F) {
10392 modrm
= getIByte(delta
+3);
10393 if (epartIsReg(modrm
)) {
10395 putXMMReg( eregOfRM(modrm
),
10396 getXMMReg(gregOfRM(modrm
)) );
10397 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRM(modrm
)),
10398 nameXMMReg(eregOfRM(modrm
)));
10400 addr
= disAMode( &alen
, sorb
, delta
+3, dis_buf
);
10402 storeLE( mkexpr(addr
), getXMMReg(gregOfRM(modrm
)) );
10403 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRM(modrm
)), dis_buf
);
10405 goto decode_success
;
10408 /* F2 0F D6 = MOVDQ2Q -- move from E (lo half xmm, not mem) to G (mmx). */
10409 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0xD6) {
10411 modrm
= getIByte(delta
+3);
10412 if (epartIsReg(modrm
)) {
10414 putMMXReg( gregOfRM(modrm
),
10415 getXMMRegLane64( eregOfRM(modrm
), 0 ));
10416 DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
10417 nameMMXReg(gregOfRM(modrm
)));
10419 goto decode_success
;
10421 /* fall through, apparently no mem case for this insn */
10425 /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */
10426 /* These seems identical to MOVHPS. This instruction encoding is
10427 completely crazy. */
10428 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x16) {
10429 modrm
= getIByte(delta
+2);
10430 if (epartIsReg(modrm
)) {
10431 /* fall through; apparently reg-reg is not possible */
10433 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
10435 putXMMRegLane64( gregOfRM(modrm
), 1/*upper lane*/,
10436 loadLE(Ity_I64
, mkexpr(addr
)) );
10437 DIP("movhpd %s,%s\n", dis_buf
,
10438 nameXMMReg( gregOfRM(modrm
) ));
10439 goto decode_success
;
10443 /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */
10444 /* Again, this seems identical to MOVHPS. */
10445 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x17) {
10446 if (!epartIsReg(insn
[2])) {
10448 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
10450 storeLE( mkexpr(addr
),
10451 getXMMRegLane64( gregOfRM(insn
[2]),
10452 1/*upper lane*/ ) );
10453 DIP("movhpd %s,%s\n", nameXMMReg( gregOfRM(insn
[2]) ),
10455 goto decode_success
;
10457 /* else fall through */
10460 /* 66 0F 12 = MOVLPD -- move from mem to low half of XMM. */
10461 /* Identical to MOVLPS ? */
10462 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x12) {
10463 modrm
= getIByte(delta
+2);
10464 if (epartIsReg(modrm
)) {
10465 /* fall through; apparently reg-reg is not possible */
10467 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
10469 putXMMRegLane64( gregOfRM(modrm
), 0/*lower lane*/,
10470 loadLE(Ity_I64
, mkexpr(addr
)) );
10471 DIP("movlpd %s, %s\n",
10472 dis_buf
, nameXMMReg( gregOfRM(modrm
) ));
10473 goto decode_success
;
10477 /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */
10478 /* Identical to MOVLPS ? */
10479 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x13) {
10480 if (!epartIsReg(insn
[2])) {
10482 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
10484 storeLE( mkexpr(addr
),
10485 getXMMRegLane64( gregOfRM(insn
[2]),
10486 0/*lower lane*/ ) );
10487 DIP("movlpd %s, %s\n", nameXMMReg( gregOfRM(insn
[2]) ),
10489 goto decode_success
;
10491 /* else fall through */
10494 /* 66 0F 50 = MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to
10495 2 lowest bits of ireg(G) */
10496 if (insn
[0] == 0x0F && insn
[1] == 0x50) {
10497 modrm
= getIByte(delta
+2);
10498 if (sz
== 2 && epartIsReg(modrm
)) {
10500 t0
= newTemp(Ity_I32
);
10501 t1
= newTemp(Ity_I32
);
10503 src
= eregOfRM(modrm
);
10504 assign( t0
, binop( Iop_And32
,
10505 binop(Iop_Shr32
, getXMMRegLane32(src
,1), mkU8(31)),
10507 assign( t1
, binop( Iop_And32
,
10508 binop(Iop_Shr32
, getXMMRegLane32(src
,3), mkU8(30)),
10510 putIReg(4, gregOfRM(modrm
),
10511 binop(Iop_Or32
, mkexpr(t0
), mkexpr(t1
))
10513 DIP("movmskpd %s,%s\n", nameXMMReg(src
),
10514 nameIReg(4, gregOfRM(modrm
)));
10515 goto decode_success
;
10517 /* else fall through */
10520 /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */
10521 if (insn
[0] == 0x0F && insn
[1] == 0xF7) {
10522 modrm
= getIByte(delta
+2);
10523 if (sz
== 2 && epartIsReg(modrm
)) {
10524 IRTemp regD
= newTemp(Ity_V128
);
10525 IRTemp mask
= newTemp(Ity_V128
);
10526 IRTemp olddata
= newTemp(Ity_V128
);
10527 IRTemp newdata
= newTemp(Ity_V128
);
10528 addr
= newTemp(Ity_I32
);
10530 assign( addr
, handleSegOverride( sorb
, getIReg(4, R_EDI
) ));
10531 assign( regD
, getXMMReg( gregOfRM(modrm
) ));
10533 /* Unfortunately can't do the obvious thing with SarN8x16
10534 here since that can't be re-emitted as SSE2 code - no such
10538 binop(Iop_64HLtoV128
,
10540 getXMMRegLane64( eregOfRM(modrm
), 1 ),
10543 getXMMRegLane64( eregOfRM(modrm
), 0 ),
10545 assign( olddata
, loadLE( Ity_V128
, mkexpr(addr
) ));
10553 unop(Iop_NotV128
, mkexpr(mask
)))) );
10554 storeLE( mkexpr(addr
), mkexpr(newdata
) );
10557 DIP("maskmovdqu %s,%s\n", nameXMMReg( eregOfRM(modrm
) ),
10558 nameXMMReg( gregOfRM(modrm
) ) );
10559 goto decode_success
;
10561 /* else fall through */
10564 /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */
10565 if (insn
[0] == 0x0F && insn
[1] == 0xE7) {
10566 modrm
= getIByte(delta
+2);
10567 if (sz
== 2 && !epartIsReg(modrm
)) {
10568 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
10569 gen_SEGV_if_not_16_aligned( addr
);
10570 storeLE( mkexpr(addr
), getXMMReg(gregOfRM(modrm
)) );
10571 DIP("movntdq %s,%s\n", dis_buf
,
10572 nameXMMReg(gregOfRM(modrm
)));
10574 goto decode_success
;
10576 /* else fall through */
10579 /* 0F C3 = MOVNTI -- for us, just a plain ireg store. */
10580 if (insn
[0] == 0x0F && insn
[1] == 0xC3) {
10582 modrm
= getIByte(delta
+2);
10583 if (!epartIsReg(modrm
)) {
10584 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
10585 storeLE( mkexpr(addr
), getIReg(4, gregOfRM(modrm
)) );
10586 DIP("movnti %s,%s\n", dis_buf
,
10587 nameIReg(4, gregOfRM(modrm
)));
10589 goto decode_success
;
10591 /* else fall through */
10594 /* 66 0F D6 = MOVQ -- move 64 bits from G (lo half xmm) to E (mem
10595 or lo half xmm). */
10596 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xD6) {
10597 modrm
= getIByte(delta
+2);
10598 if (epartIsReg(modrm
)) {
10599 /* fall through, awaiting test case */
10600 /* dst: lo half copied, hi half zeroed */
10602 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
10603 storeLE( mkexpr(addr
),
10604 getXMMRegLane64( gregOfRM(modrm
), 0 ));
10605 DIP("movq %s,%s\n", nameXMMReg(gregOfRM(modrm
)), dis_buf
);
10607 goto decode_success
;
10611 /* F3 0F D6 = MOVQ2DQ -- move from E (mmx) to G (lo half xmm, zero
10613 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0xD6) {
10615 modrm
= getIByte(delta
+3);
10616 if (epartIsReg(modrm
)) {
10618 putXMMReg( gregOfRM(modrm
),
10619 unop(Iop_64UtoV128
, getMMXReg( eregOfRM(modrm
) )) );
10620 DIP("movq2dq %s,%s\n", nameMMXReg(eregOfRM(modrm
)),
10621 nameXMMReg(gregOfRM(modrm
)));
10623 goto decode_success
;
10625 /* fall through, apparently no mem case for this insn */
10629 /* F3 0F 7E = MOVQ -- move 64 bits from E (mem or lo half xmm) to
10630 G (lo half xmm). Upper half of G is zeroed out. */
10631 /* F2 0F 10 = MOVSD -- move 64 bits from E (mem or lo half xmm) to
10632 G (lo half xmm). If E is mem, upper half of G is zeroed out.
10633 If E is reg, upper half of G is unchanged. */
10634 if ((insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0x10)
10635 || (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x7E)) {
10637 modrm
= getIByte(delta
+3);
10638 if (epartIsReg(modrm
)) {
10639 putXMMRegLane64( gregOfRM(modrm
), 0,
10640 getXMMRegLane64( eregOfRM(modrm
), 0 ));
10641 if (insn
[0] == 0xF3/*MOVQ*/) {
10642 /* zero bits 127:64 */
10643 putXMMRegLane64( gregOfRM(modrm
), 1, mkU64(0) );
10645 DIP("movsd %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
10646 nameXMMReg(gregOfRM(modrm
)));
10649 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
10650 /* zero bits 127:64 */
10651 putXMMRegLane64( gregOfRM(modrm
), 1, mkU64(0) );
10652 /* write bits 63:0 */
10653 putXMMRegLane64( gregOfRM(modrm
), 0,
10654 loadLE(Ity_I64
, mkexpr(addr
)) );
10655 DIP("movsd %s,%s\n", dis_buf
,
10656 nameXMMReg(gregOfRM(modrm
)));
10659 goto decode_success
;
10662 /* F2 0F 11 = MOVSD -- move 64 bits from G (lo half xmm) to E (mem
10663 or lo half xmm). */
10664 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0x11) {
10666 modrm
= getIByte(delta
+3);
10667 if (epartIsReg(modrm
)) {
10668 putXMMRegLane64( eregOfRM(modrm
), 0,
10669 getXMMRegLane64( gregOfRM(modrm
), 0 ));
10670 DIP("movsd %s,%s\n", nameXMMReg(gregOfRM(modrm
)),
10671 nameXMMReg(eregOfRM(modrm
)));
10674 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
10675 storeLE( mkexpr(addr
),
10676 getXMMRegLane64(gregOfRM(modrm
), 0) );
10677 DIP("movsd %s,%s\n", nameXMMReg(gregOfRM(modrm
)),
10681 goto decode_success
;
10684 /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */
10685 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x59) {
10686 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "mulpd", Iop_Mul64Fx2
);
10687 goto decode_success
;
10690 /* F2 0F 59 = MULSD -- mul 64F0x2 from R/M to R */
10691 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0x59) {
10693 delta
= dis_SSE_E_to_G_lo64( sorb
, delta
+3, "mulsd", Iop_Mul64F0x2
);
10694 goto decode_success
;
10697 /* 66 0F 56 = ORPD -- G = G and E */
10698 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x56) {
10699 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "orpd", Iop_OrV128
);
10700 goto decode_success
;
10703 /* 66 0F C6 /r ib = SHUFPD -- shuffle packed F64s */
10704 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xC6) {
10706 IRTemp sV
= newTemp(Ity_V128
);
10707 IRTemp dV
= newTemp(Ity_V128
);
10708 IRTemp s1
= newTemp(Ity_I64
);
10709 IRTemp s0
= newTemp(Ity_I64
);
10710 IRTemp d1
= newTemp(Ity_I64
);
10711 IRTemp d0
= newTemp(Ity_I64
);
10714 assign( dV
, getXMMReg(gregOfRM(modrm
)) );
10716 if (epartIsReg(modrm
)) {
10717 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
10718 select
= (Int
)insn
[3];
10720 DIP("shufpd $%d,%s,%s\n", select
,
10721 nameXMMReg(eregOfRM(modrm
)),
10722 nameXMMReg(gregOfRM(modrm
)));
10724 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
10725 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
10726 select
= (Int
)insn
[2+alen
];
10728 DIP("shufpd $%d,%s,%s\n", select
,
10730 nameXMMReg(gregOfRM(modrm
)));
10733 assign( d1
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
10734 assign( d0
, unop(Iop_V128to64
, mkexpr(dV
)) );
10735 assign( s1
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
10736 assign( s0
, unop(Iop_V128to64
, mkexpr(sV
)) );
10738 # define SELD(n) mkexpr((n)==0 ? d0 : d1)
10739 # define SELS(n) mkexpr((n)==0 ? s0 : s1)
10743 binop(Iop_64HLtoV128
, SELS((select
>>1)&1), SELD((select
>>0)&1) )
10749 goto decode_success
;
10752 /* 66 0F 51 = SQRTPD -- approx sqrt 64Fx2 from R/M to R */
10753 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x51) {
10754 delta
= dis_SSE_E_to_G_unary_all( sorb
, delta
+2,
10755 "sqrtpd", Iop_Sqrt64Fx2
);
10756 goto decode_success
;
10759 /* F2 0F 51 = SQRTSD -- approx sqrt 64F0x2 from R/M to R */
10760 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0x51) {
10762 delta
= dis_SSE_E_to_G_unary_lo64( sorb
, delta
+3,
10763 "sqrtsd", Iop_Sqrt64F0x2
);
10764 goto decode_success
;
10767 /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */
10768 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x5C) {
10769 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "subpd", Iop_Sub64Fx2
);
10770 goto decode_success
;
10773 /* F2 0F 5C = SUBSD -- sub 64F0x2 from R/M to R */
10774 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0x5C) {
10776 delta
= dis_SSE_E_to_G_lo64( sorb
, delta
+3, "subsd", Iop_Sub64F0x2
);
10777 goto decode_success
;
10780 /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */
10781 /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */
10782 /* These just appear to be special cases of SHUFPS */
10783 if (sz
== 2 && insn
[0] == 0x0F && (insn
[1] == 0x15 || insn
[1] == 0x14)) {
10784 IRTemp s1
= newTemp(Ity_I64
);
10785 IRTemp s0
= newTemp(Ity_I64
);
10786 IRTemp d1
= newTemp(Ity_I64
);
10787 IRTemp d0
= newTemp(Ity_I64
);
10788 IRTemp sV
= newTemp(Ity_V128
);
10789 IRTemp dV
= newTemp(Ity_V128
);
10790 Bool hi
= toBool(insn
[1] == 0x15);
10793 assign( dV
, getXMMReg(gregOfRM(modrm
)) );
10795 if (epartIsReg(modrm
)) {
10796 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
10798 DIP("unpck%sps %s,%s\n", hi
? "h" : "l",
10799 nameXMMReg(eregOfRM(modrm
)),
10800 nameXMMReg(gregOfRM(modrm
)));
10802 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
10803 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
10805 DIP("unpck%sps %s,%s\n", hi
? "h" : "l",
10807 nameXMMReg(gregOfRM(modrm
)));
10810 assign( d1
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
10811 assign( d0
, unop(Iop_V128to64
, mkexpr(dV
)) );
10812 assign( s1
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
10813 assign( s0
, unop(Iop_V128to64
, mkexpr(sV
)) );
10816 putXMMReg( gregOfRM(modrm
),
10817 binop(Iop_64HLtoV128
, mkexpr(s1
), mkexpr(d1
)) );
10819 putXMMReg( gregOfRM(modrm
),
10820 binop(Iop_64HLtoV128
, mkexpr(s0
), mkexpr(d0
)) );
10823 goto decode_success
;
10826 /* 66 0F 57 = XORPD -- G = G and E */
10827 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x57) {
10828 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "xorpd", Iop_XorV128
);
10829 goto decode_success
;
10832 /* 66 0F 6B = PACKSSDW */
10833 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x6B) {
10834 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10836 Iop_QNarrowBin32Sto16Sx8
, True
);
10837 goto decode_success
;
10840 /* 66 0F 63 = PACKSSWB */
10841 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x63) {
10842 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10844 Iop_QNarrowBin16Sto8Sx16
, True
);
10845 goto decode_success
;
10848 /* 66 0F 67 = PACKUSWB */
10849 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x67) {
10850 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10852 Iop_QNarrowBin16Sto8Ux16
, True
);
10853 goto decode_success
;
10856 /* 66 0F FC = PADDB */
10857 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xFC) {
10858 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10859 "paddb", Iop_Add8x16
, False
);
10860 goto decode_success
;
10863 /* 66 0F FE = PADDD */
10864 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xFE) {
10865 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10866 "paddd", Iop_Add32x4
, False
);
10867 goto decode_success
;
10870 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
10871 /* 0F D4 = PADDQ -- add 64x1 */
10872 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xD4) {
10874 delta
= dis_MMXop_regmem_to_reg (
10875 sorb
, delta
+2, insn
[1], "paddq", False
);
10876 goto decode_success
;
10879 /* 66 0F D4 = PADDQ */
10880 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xD4) {
10881 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10882 "paddq", Iop_Add64x2
, False
);
10883 goto decode_success
;
10886 /* 66 0F FD = PADDW */
10887 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xFD) {
10888 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10889 "paddw", Iop_Add16x8
, False
);
10890 goto decode_success
;
10893 /* 66 0F EC = PADDSB */
10894 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xEC) {
10895 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10896 "paddsb", Iop_QAdd8Sx16
, False
);
10897 goto decode_success
;
10900 /* 66 0F ED = PADDSW */
10901 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xED) {
10902 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10903 "paddsw", Iop_QAdd16Sx8
, False
);
10904 goto decode_success
;
10907 /* 66 0F DC = PADDUSB */
10908 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xDC) {
10909 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10910 "paddusb", Iop_QAdd8Ux16
, False
);
10911 goto decode_success
;
10914 /* 66 0F DD = PADDUSW */
10915 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xDD) {
10916 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10917 "paddusw", Iop_QAdd16Ux8
, False
);
10918 goto decode_success
;
10921 /* 66 0F DB = PAND */
10922 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xDB) {
10923 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "pand", Iop_AndV128
);
10924 goto decode_success
;
10927 /* 66 0F DF = PANDN */
10928 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xDF) {
10929 delta
= dis_SSE_E_to_G_all_invG( sorb
, delta
+2, "pandn", Iop_AndV128
);
10930 goto decode_success
;
10933 /* 66 0F E0 = PAVGB */
10934 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xE0) {
10935 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10936 "pavgb", Iop_Avg8Ux16
, False
);
10937 goto decode_success
;
10940 /* 66 0F E3 = PAVGW */
10941 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xE3) {
10942 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10943 "pavgw", Iop_Avg16Ux8
, False
);
10944 goto decode_success
;
10947 /* 66 0F 74 = PCMPEQB */
10948 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x74) {
10949 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10950 "pcmpeqb", Iop_CmpEQ8x16
, False
);
10951 goto decode_success
;
10954 /* 66 0F 76 = PCMPEQD */
10955 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x76) {
10956 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10957 "pcmpeqd", Iop_CmpEQ32x4
, False
);
10958 goto decode_success
;
10961 /* 66 0F 75 = PCMPEQW */
10962 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x75) {
10963 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10964 "pcmpeqw", Iop_CmpEQ16x8
, False
);
10965 goto decode_success
;
10968 /* 66 0F 64 = PCMPGTB */
10969 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x64) {
10970 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10971 "pcmpgtb", Iop_CmpGT8Sx16
, False
);
10972 goto decode_success
;
10975 /* 66 0F 66 = PCMPGTD */
10976 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x66) {
10977 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10978 "pcmpgtd", Iop_CmpGT32Sx4
, False
);
10979 goto decode_success
;
10982 /* 66 0F 65 = PCMPGTW */
10983 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x65) {
10984 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10985 "pcmpgtw", Iop_CmpGT16Sx8
, False
);
10986 goto decode_success
;
10989 /* 66 0F C5 = PEXTRW -- extract 16-bit field from xmm(E) and put
10990 zero-extend of it in ireg(G). */
10991 if (insn
[0] == 0x0F && insn
[1] == 0xC5) {
10993 if (sz
== 2 && epartIsReg(modrm
)) {
10994 t5
= newTemp(Ity_V128
);
10995 t4
= newTemp(Ity_I16
);
10996 assign(t5
, getXMMReg(eregOfRM(modrm
)));
10997 breakup128to32s( t5
, &t3
, &t2
, &t1
, &t0
);
10998 switch (insn
[3] & 7) {
10999 case 0: assign(t4
, unop(Iop_32to16
, mkexpr(t0
))); break;
11000 case 1: assign(t4
, unop(Iop_32HIto16
, mkexpr(t0
))); break;
11001 case 2: assign(t4
, unop(Iop_32to16
, mkexpr(t1
))); break;
11002 case 3: assign(t4
, unop(Iop_32HIto16
, mkexpr(t1
))); break;
11003 case 4: assign(t4
, unop(Iop_32to16
, mkexpr(t2
))); break;
11004 case 5: assign(t4
, unop(Iop_32HIto16
, mkexpr(t2
))); break;
11005 case 6: assign(t4
, unop(Iop_32to16
, mkexpr(t3
))); break;
11006 case 7: assign(t4
, unop(Iop_32HIto16
, mkexpr(t3
))); break;
11007 default: vassert(0); /*NOTREACHED*/
11009 putIReg(4, gregOfRM(modrm
), unop(Iop_16Uto32
, mkexpr(t4
)));
11010 DIP("pextrw $%d,%s,%s\n",
11011 (Int
)insn
[3], nameXMMReg(eregOfRM(modrm
)),
11012 nameIReg(4,gregOfRM(modrm
)));
11014 goto decode_success
;
11016 /* else fall through */
11019 /* 66 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
11020 put it into the specified lane of xmm(G). */
11021 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xC4) {
11023 t4
= newTemp(Ity_I16
);
11026 if (epartIsReg(modrm
)) {
11027 assign(t4
, getIReg(2, eregOfRM(modrm
)));
11029 lane
= insn
[3+1-1];
11030 DIP("pinsrw $%d,%s,%s\n", lane
,
11031 nameIReg(2,eregOfRM(modrm
)),
11032 nameXMMReg(gregOfRM(modrm
)));
11034 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
11036 lane
= insn
[3+alen
-1];
11037 assign(t4
, loadLE(Ity_I16
, mkexpr(addr
)));
11038 DIP("pinsrw $%d,%s,%s\n", lane
,
11040 nameXMMReg(gregOfRM(modrm
)));
11043 putXMMRegLane16( gregOfRM(modrm
), lane
& 7, mkexpr(t4
) );
11044 goto decode_success
;
11047 /* 66 0F F5 = PMADDWD -- Multiply and add packed integers from
11048 E(xmm or mem) to G(xmm) */
11049 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xF5) {
11050 IRTemp s1V
= newTemp(Ity_V128
);
11051 IRTemp s2V
= newTemp(Ity_V128
);
11052 IRTemp dV
= newTemp(Ity_V128
);
11053 IRTemp s1Hi
= newTemp(Ity_I64
);
11054 IRTemp s1Lo
= newTemp(Ity_I64
);
11055 IRTemp s2Hi
= newTemp(Ity_I64
);
11056 IRTemp s2Lo
= newTemp(Ity_I64
);
11057 IRTemp dHi
= newTemp(Ity_I64
);
11058 IRTemp dLo
= newTemp(Ity_I64
);
11060 if (epartIsReg(modrm
)) {
11061 assign( s1V
, getXMMReg(eregOfRM(modrm
)) );
11063 DIP("pmaddwd %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
11064 nameXMMReg(gregOfRM(modrm
)));
11066 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
11067 assign( s1V
, loadLE(Ity_V128
, mkexpr(addr
)) );
11069 DIP("pmaddwd %s,%s\n", dis_buf
,
11070 nameXMMReg(gregOfRM(modrm
)));
11072 assign( s2V
, getXMMReg(gregOfRM(modrm
)) );
11073 assign( s1Hi
, unop(Iop_V128HIto64
, mkexpr(s1V
)) );
11074 assign( s1Lo
, unop(Iop_V128to64
, mkexpr(s1V
)) );
11075 assign( s2Hi
, unop(Iop_V128HIto64
, mkexpr(s2V
)) );
11076 assign( s2Lo
, unop(Iop_V128to64
, mkexpr(s2V
)) );
11077 assign( dHi
, mkIRExprCCall(
11078 Ity_I64
, 0/*regparms*/,
11079 "x86g_calculate_mmx_pmaddwd",
11080 &x86g_calculate_mmx_pmaddwd
,
11081 mkIRExprVec_2( mkexpr(s1Hi
), mkexpr(s2Hi
))
11083 assign( dLo
, mkIRExprCCall(
11084 Ity_I64
, 0/*regparms*/,
11085 "x86g_calculate_mmx_pmaddwd",
11086 &x86g_calculate_mmx_pmaddwd
,
11087 mkIRExprVec_2( mkexpr(s1Lo
), mkexpr(s2Lo
))
11089 assign( dV
, binop(Iop_64HLtoV128
, mkexpr(dHi
), mkexpr(dLo
))) ;
11090 putXMMReg(gregOfRM(modrm
), mkexpr(dV
));
11091 goto decode_success
;
11094 /* 66 0F EE = PMAXSW -- 16x8 signed max */
11095 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xEE) {
11096 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11097 "pmaxsw", Iop_Max16Sx8
, False
);
11098 goto decode_success
;
11101 /* 66 0F DE = PMAXUB -- 8x16 unsigned max */
11102 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xDE) {
11103 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11104 "pmaxub", Iop_Max8Ux16
, False
);
11105 goto decode_success
;
11108 /* 66 0F EA = PMINSW -- 16x8 signed min */
11109 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xEA) {
11110 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11111 "pminsw", Iop_Min16Sx8
, False
);
11112 goto decode_success
;
11115 /* 66 0F DA = PMINUB -- 8x16 unsigned min */
11116 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xDA) {
11117 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11118 "pminub", Iop_Min8Ux16
, False
);
11119 goto decode_success
;
11122 /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16 lanes
11123 in xmm(E), turn them into a byte, and put zero-extend of it in
11125 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xD7) {
11127 if (epartIsReg(modrm
)) {
11128 t0
= newTemp(Ity_I64
);
11129 t1
= newTemp(Ity_I64
);
11130 assign(t0
, getXMMRegLane64(eregOfRM(modrm
), 0));
11131 assign(t1
, getXMMRegLane64(eregOfRM(modrm
), 1));
11132 t5
= newTemp(Ity_I32
);
11136 unop(Iop_GetMSBs8x8
, mkexpr(t1
)),
11137 unop(Iop_GetMSBs8x8
, mkexpr(t0
)))));
11138 putIReg(4, gregOfRM(modrm
), mkexpr(t5
));
11139 DIP("pmovmskb %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
11140 nameIReg(4,gregOfRM(modrm
)));
11142 goto decode_success
;
11144 /* else fall through */
11147 /* 66 0F E4 = PMULHUW -- 16x8 hi-half of unsigned widening multiply */
11148 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xE4) {
11149 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11150 "pmulhuw", Iop_MulHi16Ux8
, False
);
11151 goto decode_success
;
11154 /* 66 0F E5 = PMULHW -- 16x8 hi-half of signed widening multiply */
11155 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xE5) {
11156 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11157 "pmulhw", Iop_MulHi16Sx8
, False
);
11158 goto decode_success
;
11161 /* 66 0F D5 = PMULHL -- 16x8 multiply */
11162 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xD5) {
11163 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11164 "pmullw", Iop_Mul16x8
, False
);
11165 goto decode_success
;
11168 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
11169 /* 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
11170 0 to form 64-bit result */
11171 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xF4) {
11172 IRTemp sV
= newTemp(Ity_I64
);
11173 IRTemp dV
= newTemp(Ity_I64
);
11174 t1
= newTemp(Ity_I32
);
11175 t0
= newTemp(Ity_I32
);
11179 assign( dV
, getMMXReg(gregOfRM(modrm
)) );
11181 if (epartIsReg(modrm
)) {
11182 assign( sV
, getMMXReg(eregOfRM(modrm
)) );
11184 DIP("pmuludq %s,%s\n", nameMMXReg(eregOfRM(modrm
)),
11185 nameMMXReg(gregOfRM(modrm
)));
11187 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
11188 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
11190 DIP("pmuludq %s,%s\n", dis_buf
,
11191 nameMMXReg(gregOfRM(modrm
)));
11194 assign( t0
, unop(Iop_64to32
, mkexpr(dV
)) );
11195 assign( t1
, unop(Iop_64to32
, mkexpr(sV
)) );
11196 putMMXReg( gregOfRM(modrm
),
11197 binop( Iop_MullU32
, mkexpr(t0
), mkexpr(t1
) ) );
11198 goto decode_success
;
11201 /* 66 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
11202 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit
11204 /* This is a really poor translation -- could be improved if
11205 performance critical */
11206 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xF4) {
11208 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
11209 sV
= newTemp(Ity_V128
);
11210 dV
= newTemp(Ity_V128
);
11211 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
11212 t1
= newTemp(Ity_I64
);
11213 t0
= newTemp(Ity_I64
);
11215 assign( dV
, getXMMReg(gregOfRM(modrm
)) );
11217 if (epartIsReg(modrm
)) {
11218 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
11220 DIP("pmuludq %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
11221 nameXMMReg(gregOfRM(modrm
)));
11223 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
11224 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
11226 DIP("pmuludq %s,%s\n", dis_buf
,
11227 nameXMMReg(gregOfRM(modrm
)));
11230 breakup128to32s( dV
, &d3
, &d2
, &d1
, &d0
);
11231 breakup128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
11233 assign( t0
, binop( Iop_MullU32
, mkexpr(d0
), mkexpr(s0
)) );
11234 putXMMRegLane64( gregOfRM(modrm
), 0, mkexpr(t0
) );
11235 assign( t1
, binop( Iop_MullU32
, mkexpr(d2
), mkexpr(s2
)) );
11236 putXMMRegLane64( gregOfRM(modrm
), 1, mkexpr(t1
) );
11237 goto decode_success
;
11240 /* 66 0F EB = POR */
11241 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xEB) {
11242 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "por", Iop_OrV128
);
11243 goto decode_success
;
11246 /* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs
11247 from E(xmm or mem) to G(xmm) */
11248 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xF6) {
11249 IRTemp s1V
= newTemp(Ity_V128
);
11250 IRTemp s2V
= newTemp(Ity_V128
);
11251 IRTemp dV
= newTemp(Ity_V128
);
11252 IRTemp s1Hi
= newTemp(Ity_I64
);
11253 IRTemp s1Lo
= newTemp(Ity_I64
);
11254 IRTemp s2Hi
= newTemp(Ity_I64
);
11255 IRTemp s2Lo
= newTemp(Ity_I64
);
11256 IRTemp dHi
= newTemp(Ity_I64
);
11257 IRTemp dLo
= newTemp(Ity_I64
);
11259 if (epartIsReg(modrm
)) {
11260 assign( s1V
, getXMMReg(eregOfRM(modrm
)) );
11262 DIP("psadbw %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
11263 nameXMMReg(gregOfRM(modrm
)));
11265 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
11266 assign( s1V
, loadLE(Ity_V128
, mkexpr(addr
)) );
11268 DIP("psadbw %s,%s\n", dis_buf
,
11269 nameXMMReg(gregOfRM(modrm
)));
11271 assign( s2V
, getXMMReg(gregOfRM(modrm
)) );
11272 assign( s1Hi
, unop(Iop_V128HIto64
, mkexpr(s1V
)) );
11273 assign( s1Lo
, unop(Iop_V128to64
, mkexpr(s1V
)) );
11274 assign( s2Hi
, unop(Iop_V128HIto64
, mkexpr(s2V
)) );
11275 assign( s2Lo
, unop(Iop_V128to64
, mkexpr(s2V
)) );
11276 assign( dHi
, mkIRExprCCall(
11277 Ity_I64
, 0/*regparms*/,
11278 "x86g_calculate_mmx_psadbw",
11279 &x86g_calculate_mmx_psadbw
,
11280 mkIRExprVec_2( mkexpr(s1Hi
), mkexpr(s2Hi
))
11282 assign( dLo
, mkIRExprCCall(
11283 Ity_I64
, 0/*regparms*/,
11284 "x86g_calculate_mmx_psadbw",
11285 &x86g_calculate_mmx_psadbw
,
11286 mkIRExprVec_2( mkexpr(s1Lo
), mkexpr(s2Lo
))
11288 assign( dV
, binop(Iop_64HLtoV128
, mkexpr(dHi
), mkexpr(dLo
))) ;
11289 putXMMReg(gregOfRM(modrm
), mkexpr(dV
));
11290 goto decode_success
;
11293 /* 66 0F 70 = PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm) */
11294 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x70) {
11296 IRTemp sV
, dV
, s3
, s2
, s1
, s0
;
11297 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
11298 sV
= newTemp(Ity_V128
);
11299 dV
= newTemp(Ity_V128
);
11301 if (epartIsReg(modrm
)) {
11302 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
11303 order
= (Int
)insn
[3];
11305 DIP("pshufd $%d,%s,%s\n", order
,
11306 nameXMMReg(eregOfRM(modrm
)),
11307 nameXMMReg(gregOfRM(modrm
)));
11309 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
11310 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
11311 order
= (Int
)insn
[2+alen
];
11313 DIP("pshufd $%d,%s,%s\n", order
,
11315 nameXMMReg(gregOfRM(modrm
)));
11317 breakup128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
11320 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11322 mk128from32s( SEL((order
>>6)&3), SEL((order
>>4)&3),
11323 SEL((order
>>2)&3), SEL((order
>>0)&3) )
11325 putXMMReg(gregOfRM(modrm
), mkexpr(dV
));
11327 goto decode_success
;
11330 /* F3 0F 70 = PSHUFHW -- rearrange upper half 4x16 from E(xmm or
11331 mem) to G(xmm), and copy lower half */
11332 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x70) {
11334 IRTemp sVhi
, dVhi
, sV
, dV
, s3
, s2
, s1
, s0
;
11335 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
11336 sV
= newTemp(Ity_V128
);
11337 dV
= newTemp(Ity_V128
);
11338 sVhi
= newTemp(Ity_I64
);
11339 dVhi
= newTemp(Ity_I64
);
11341 if (epartIsReg(modrm
)) {
11342 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
11343 order
= (Int
)insn
[4];
11345 DIP("pshufhw $%d,%s,%s\n", order
,
11346 nameXMMReg(eregOfRM(modrm
)),
11347 nameXMMReg(gregOfRM(modrm
)));
11349 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
11350 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
11351 order
= (Int
)insn
[3+alen
];
11353 DIP("pshufhw $%d,%s,%s\n", order
,
11355 nameXMMReg(gregOfRM(modrm
)));
11357 assign( sVhi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
11358 breakup64to16s( sVhi
, &s3
, &s2
, &s1
, &s0
);
11361 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11363 mk64from16s( SEL((order
>>6)&3), SEL((order
>>4)&3),
11364 SEL((order
>>2)&3), SEL((order
>>0)&3) )
11366 assign(dV
, binop( Iop_64HLtoV128
,
11368 unop(Iop_V128to64
, mkexpr(sV
))) );
11369 putXMMReg(gregOfRM(modrm
), mkexpr(dV
));
11371 goto decode_success
;
11374 /* F2 0F 70 = PSHUFLW -- rearrange lower half 4x16 from E(xmm or
11375 mem) to G(xmm), and copy upper half */
11376 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0x70) {
11378 IRTemp sVlo
, dVlo
, sV
, dV
, s3
, s2
, s1
, s0
;
11379 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
11380 sV
= newTemp(Ity_V128
);
11381 dV
= newTemp(Ity_V128
);
11382 sVlo
= newTemp(Ity_I64
);
11383 dVlo
= newTemp(Ity_I64
);
11385 if (epartIsReg(modrm
)) {
11386 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
11387 order
= (Int
)insn
[4];
11389 DIP("pshuflw $%d,%s,%s\n", order
,
11390 nameXMMReg(eregOfRM(modrm
)),
11391 nameXMMReg(gregOfRM(modrm
)));
11393 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
11394 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
11395 order
= (Int
)insn
[3+alen
];
11397 DIP("pshuflw $%d,%s,%s\n", order
,
11399 nameXMMReg(gregOfRM(modrm
)));
11401 assign( sVlo
, unop(Iop_V128to64
, mkexpr(sV
)) );
11402 breakup64to16s( sVlo
, &s3
, &s2
, &s1
, &s0
);
11405 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11407 mk64from16s( SEL((order
>>6)&3), SEL((order
>>4)&3),
11408 SEL((order
>>2)&3), SEL((order
>>0)&3) )
11410 assign(dV
, binop( Iop_64HLtoV128
,
11411 unop(Iop_V128HIto64
, mkexpr(sV
)),
11413 putXMMReg(gregOfRM(modrm
), mkexpr(dV
));
11415 goto decode_success
;
11418 /* 66 0F 72 /6 ib = PSLLD by immediate */
11419 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x72
11420 && epartIsReg(insn
[2])
11421 && gregOfRM(insn
[2]) == 6) {
11422 delta
= dis_SSE_shiftE_imm( delta
+2, "pslld", Iop_ShlN32x4
);
11423 goto decode_success
;
11426 /* 66 0F F2 = PSLLD by E */
11427 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xF2) {
11428 delta
= dis_SSE_shiftG_byE( sorb
, delta
+2, "pslld", Iop_ShlN32x4
);
11429 goto decode_success
;
11432 /* 66 0F 73 /7 ib = PSLLDQ by immediate */
11433 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x73
11434 && epartIsReg(insn
[2])
11435 && gregOfRM(insn
[2]) == 7) {
11436 IRTemp sV
, dV
, hi64
, lo64
, hi64r
, lo64r
;
11437 Int imm
= (Int
)insn
[3];
11438 Int reg
= eregOfRM(insn
[2]);
11439 DIP("pslldq $%d,%s\n", imm
, nameXMMReg(reg
));
11440 vassert(imm
>= 0 && imm
<= 255);
11443 sV
= newTemp(Ity_V128
);
11444 dV
= newTemp(Ity_V128
);
11445 hi64
= newTemp(Ity_I64
);
11446 lo64
= newTemp(Ity_I64
);
11447 hi64r
= newTemp(Ity_I64
);
11448 lo64r
= newTemp(Ity_I64
);
11451 putXMMReg(reg
, mkV128(0x0000));
11452 goto decode_success
;
11455 assign( sV
, getXMMReg(reg
) );
11456 assign( hi64
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
11457 assign( lo64
, unop(Iop_V128to64
, mkexpr(sV
)) );
11460 assign( lo64r
, mkexpr(lo64
) );
11461 assign( hi64r
, mkexpr(hi64
) );
11465 assign( lo64r
, mkU64(0) );
11466 assign( hi64r
, mkexpr(lo64
) );
11470 assign( lo64r
, mkU64(0) );
11471 assign( hi64r
, binop( Iop_Shl64
,
11473 mkU8( 8*(imm
-8) ) ));
11475 assign( lo64r
, binop( Iop_Shl64
,
11480 binop(Iop_Shl64
, mkexpr(hi64
),
11482 binop(Iop_Shr64
, mkexpr(lo64
),
11483 mkU8(8 * (8 - imm
)) )
11487 assign( dV
, binop(Iop_64HLtoV128
, mkexpr(hi64r
), mkexpr(lo64r
)) );
11488 putXMMReg(reg
, mkexpr(dV
));
11489 goto decode_success
;
11492 /* 66 0F 73 /6 ib = PSLLQ by immediate */
11493 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x73
11494 && epartIsReg(insn
[2])
11495 && gregOfRM(insn
[2]) == 6) {
11496 delta
= dis_SSE_shiftE_imm( delta
+2, "psllq", Iop_ShlN64x2
);
11497 goto decode_success
;
11500 /* 66 0F F3 = PSLLQ by E */
11501 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xF3) {
11502 delta
= dis_SSE_shiftG_byE( sorb
, delta
+2, "psllq", Iop_ShlN64x2
);
11503 goto decode_success
;
11506 /* 66 0F 71 /6 ib = PSLLW by immediate */
11507 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x71
11508 && epartIsReg(insn
[2])
11509 && gregOfRM(insn
[2]) == 6) {
11510 delta
= dis_SSE_shiftE_imm( delta
+2, "psllw", Iop_ShlN16x8
);
11511 goto decode_success
;
11514 /* 66 0F F1 = PSLLW by E */
11515 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xF1) {
11516 delta
= dis_SSE_shiftG_byE( sorb
, delta
+2, "psllw", Iop_ShlN16x8
);
11517 goto decode_success
;
11520 /* 66 0F 72 /4 ib = PSRAD by immediate */
11521 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x72
11522 && epartIsReg(insn
[2])
11523 && gregOfRM(insn
[2]) == 4) {
11524 delta
= dis_SSE_shiftE_imm( delta
+2, "psrad", Iop_SarN32x4
);
11525 goto decode_success
;
11528 /* 66 0F E2 = PSRAD by E */
11529 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xE2) {
11530 delta
= dis_SSE_shiftG_byE( sorb
, delta
+2, "psrad", Iop_SarN32x4
);
11531 goto decode_success
;
11534 /* 66 0F 71 /4 ib = PSRAW by immediate */
11535 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x71
11536 && epartIsReg(insn
[2])
11537 && gregOfRM(insn
[2]) == 4) {
11538 delta
= dis_SSE_shiftE_imm( delta
+2, "psraw", Iop_SarN16x8
);
11539 goto decode_success
;
11542 /* 66 0F E1 = PSRAW by E */
11543 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xE1) {
11544 delta
= dis_SSE_shiftG_byE( sorb
, delta
+2, "psraw", Iop_SarN16x8
);
11545 goto decode_success
;
11548 /* 66 0F 72 /2 ib = PSRLD by immediate */
11549 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x72
11550 && epartIsReg(insn
[2])
11551 && gregOfRM(insn
[2]) == 2) {
11552 delta
= dis_SSE_shiftE_imm( delta
+2, "psrld", Iop_ShrN32x4
);
11553 goto decode_success
;
11556 /* 66 0F D2 = PSRLD by E */
11557 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xD2) {
11558 delta
= dis_SSE_shiftG_byE( sorb
, delta
+2, "psrld", Iop_ShrN32x4
);
11559 goto decode_success
;
11562 /* 66 0F 73 /3 ib = PSRLDQ by immediate */
11563 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x73
11564 && epartIsReg(insn
[2])
11565 && gregOfRM(insn
[2]) == 3) {
11566 IRTemp sV
, dV
, hi64
, lo64
, hi64r
, lo64r
;
11567 Int imm
= (Int
)insn
[3];
11568 Int reg
= eregOfRM(insn
[2]);
11569 DIP("psrldq $%d,%s\n", imm
, nameXMMReg(reg
));
11570 vassert(imm
>= 0 && imm
<= 255);
11573 sV
= newTemp(Ity_V128
);
11574 dV
= newTemp(Ity_V128
);
11575 hi64
= newTemp(Ity_I64
);
11576 lo64
= newTemp(Ity_I64
);
11577 hi64r
= newTemp(Ity_I64
);
11578 lo64r
= newTemp(Ity_I64
);
11581 putXMMReg(reg
, mkV128(0x0000));
11582 goto decode_success
;
11585 assign( sV
, getXMMReg(reg
) );
11586 assign( hi64
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
11587 assign( lo64
, unop(Iop_V128to64
, mkexpr(sV
)) );
11590 assign( lo64r
, mkexpr(lo64
) );
11591 assign( hi64r
, mkexpr(hi64
) );
11595 assign( hi64r
, mkU64(0) );
11596 assign( lo64r
, mkexpr(hi64
) );
11600 assign( hi64r
, mkU64(0) );
11601 assign( lo64r
, binop( Iop_Shr64
,
11603 mkU8( 8*(imm
-8) ) ));
11605 assign( hi64r
, binop( Iop_Shr64
,
11610 binop(Iop_Shr64
, mkexpr(lo64
),
11612 binop(Iop_Shl64
, mkexpr(hi64
),
11613 mkU8(8 * (8 - imm
)) )
11618 assign( dV
, binop(Iop_64HLtoV128
, mkexpr(hi64r
), mkexpr(lo64r
)) );
11619 putXMMReg(reg
, mkexpr(dV
));
11620 goto decode_success
;
11623 /* 66 0F 73 /2 ib = PSRLQ by immediate */
11624 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x73
11625 && epartIsReg(insn
[2])
11626 && gregOfRM(insn
[2]) == 2) {
11627 delta
= dis_SSE_shiftE_imm( delta
+2, "psrlq", Iop_ShrN64x2
);
11628 goto decode_success
;
11631 /* 66 0F D3 = PSRLQ by E */
11632 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xD3) {
11633 delta
= dis_SSE_shiftG_byE( sorb
, delta
+2, "psrlq", Iop_ShrN64x2
);
11634 goto decode_success
;
11637 /* 66 0F 71 /2 ib = PSRLW by immediate */
11638 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x71
11639 && epartIsReg(insn
[2])
11640 && gregOfRM(insn
[2]) == 2) {
11641 delta
= dis_SSE_shiftE_imm( delta
+2, "psrlw", Iop_ShrN16x8
);
11642 goto decode_success
;
11645 /* 66 0F D1 = PSRLW by E */
11646 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xD1) {
11647 delta
= dis_SSE_shiftG_byE( sorb
, delta
+2, "psrlw", Iop_ShrN16x8
);
11648 goto decode_success
;
11651 /* 66 0F F8 = PSUBB */
11652 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xF8) {
11653 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11654 "psubb", Iop_Sub8x16
, False
);
11655 goto decode_success
;
11658 /* 66 0F FA = PSUBD */
11659 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xFA) {
11660 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11661 "psubd", Iop_Sub32x4
, False
);
11662 goto decode_success
;
11665 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
11666 /* 0F FB = PSUBQ -- sub 64x1 */
11667 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xFB) {
11669 delta
= dis_MMXop_regmem_to_reg (
11670 sorb
, delta
+2, insn
[1], "psubq", False
);
11671 goto decode_success
;
11674 /* 66 0F FB = PSUBQ */
11675 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xFB) {
11676 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11677 "psubq", Iop_Sub64x2
, False
);
11678 goto decode_success
;
11681 /* 66 0F F9 = PSUBW */
11682 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xF9) {
11683 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11684 "psubw", Iop_Sub16x8
, False
);
11685 goto decode_success
;
11688 /* 66 0F E8 = PSUBSB */
11689 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xE8) {
11690 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11691 "psubsb", Iop_QSub8Sx16
, False
);
11692 goto decode_success
;
11695 /* 66 0F E9 = PSUBSW */
11696 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xE9) {
11697 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11698 "psubsw", Iop_QSub16Sx8
, False
);
11699 goto decode_success
;
11702 /* 66 0F D8 = PSUBSB */
11703 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xD8) {
11704 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11705 "psubusb", Iop_QSub8Ux16
, False
);
11706 goto decode_success
;
11709 /* 66 0F D9 = PSUBSW */
11710 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xD9) {
11711 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11712 "psubusw", Iop_QSub16Ux8
, False
);
11713 goto decode_success
;
11716 /* 66 0F 68 = PUNPCKHBW */
11717 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x68) {
11718 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11720 Iop_InterleaveHI8x16
, True
);
11721 goto decode_success
;
11724 /* 66 0F 6A = PUNPCKHDQ */
11725 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x6A) {
11726 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11728 Iop_InterleaveHI32x4
, True
);
11729 goto decode_success
;
11732 /* 66 0F 6D = PUNPCKHQDQ */
11733 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x6D) {
11734 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11736 Iop_InterleaveHI64x2
, True
);
11737 goto decode_success
;
11740 /* 66 0F 69 = PUNPCKHWD */
11741 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x69) {
11742 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11744 Iop_InterleaveHI16x8
, True
);
11745 goto decode_success
;
11748 /* 66 0F 60 = PUNPCKLBW */
11749 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x60) {
11750 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11752 Iop_InterleaveLO8x16
, True
);
11753 goto decode_success
;
11756 /* 66 0F 62 = PUNPCKLDQ */
11757 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x62) {
11758 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11760 Iop_InterleaveLO32x4
, True
);
11761 goto decode_success
;
11764 /* 66 0F 6C = PUNPCKLQDQ */
11765 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x6C) {
11766 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11768 Iop_InterleaveLO64x2
, True
);
11769 goto decode_success
;
11772 /* 66 0F 61 = PUNPCKLWD */
11773 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x61) {
11774 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11776 Iop_InterleaveLO16x8
, True
);
11777 goto decode_success
;
11780 /* 66 0F EF = PXOR */
11781 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xEF) {
11782 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "pxor", Iop_XorV128
);
11783 goto decode_success
;
11786 //-- /* FXSAVE/FXRSTOR m32 -- load/store the FPU/MMX/SSE state. */
11787 //-- if (insn[0] == 0x0F && insn[1] == 0xAE
11788 //-- && (!epartIsReg(insn[2]))
11789 //-- && (gregOfRM(insn[2]) == 1 || gregOfRM(insn[2]) == 0) ) {
11790 //-- Bool store = gregOfRM(insn[2]) == 0;
11791 //-- vg_assert(sz == 4);
11792 //-- pair = disAMode ( cb, sorb, eip+2, dis_buf );
11793 //-- t1 = LOW24(pair);
11794 //-- eip += 2+HI8(pair);
11795 //-- uInstr3(cb, store ? SSE2a_MemWr : SSE2a_MemRd, 512,
11796 //-- Lit16, (((UShort)insn[0]) << 8) | (UShort)insn[1],
11797 //-- Lit16, (UShort)insn[2],
11798 //-- TempReg, t1 );
11799 //-- DIP("fx%s %s\n", store ? "save" : "rstor", dis_buf );
11800 //-- goto decode_success;
11803 /* 0F AE /7 = CLFLUSH -- flush cache line */
11804 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xAE
11805 && !epartIsReg(insn
[2]) && gregOfRM(insn
[2]) == 7) {
11807 /* This is something of a hack. We need to know the size of the
11808 cache line containing addr. Since we don't (easily), assume
11809 256 on the basis that no real cache would have a line that
11810 big. It's safe to invalidate more stuff than we need, just
11812 UInt lineszB
= 256;
11814 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
11817 /* Round addr down to the start of the containing block. */
11822 mkU32( ~(lineszB
-1) ))) );
11824 stmt( IRStmt_Put(OFFB_CMLEN
, mkU32(lineszB
) ) );
11826 jmp_lit(&dres
, Ijk_InvalICache
, (Addr32
)(guest_EIP_bbstart
+delta
));
11828 DIP("clflush %s\n", dis_buf
);
11829 goto decode_success
;
11832 /* ---------------------------------------------------- */
11833 /* --- end of the SSE2 decoder. --- */
11834 /* ---------------------------------------------------- */
11836 /* ---------------------------------------------------- */
11837 /* --- start of the SSE3 decoder. --- */
11838 /* ---------------------------------------------------- */
11840 /* Skip parts of the decoder which don't apply given the stated
11841 guest subarchitecture. */
11842 if (0 == (archinfo
->hwcaps
& VEX_HWCAPS_X86_SSE3
))
11843 goto after_sse_decoders
; /* no SSE3 capabilities */
11845 insn
= &guest_code
[delta
];
11847 /* F3 0F 12 = MOVSLDUP -- move from E (mem or xmm) to G (xmm),
11848 duplicating some lanes (2:2:0:0). */
11849 /* F3 0F 16 = MOVSHDUP -- move from E (mem or xmm) to G (xmm),
11850 duplicating some lanes (3:3:1:1). */
11851 if (sz
== 4 && insn
[0] == 0xF3 && insn
[1] == 0x0F
11852 && (insn
[2] == 0x12 || insn
[2] == 0x16)) {
11853 IRTemp s3
, s2
, s1
, s0
;
11854 IRTemp sV
= newTemp(Ity_V128
);
11855 Bool isH
= insn
[2] == 0x16;
11856 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
11859 if (epartIsReg(modrm
)) {
11860 assign( sV
, getXMMReg( eregOfRM(modrm
)) );
11861 DIP("movs%cdup %s,%s\n", isH
? 'h' : 'l',
11862 nameXMMReg(eregOfRM(modrm
)),
11863 nameXMMReg(gregOfRM(modrm
)));
11866 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
11867 gen_SEGV_if_not_16_aligned( addr
);
11868 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
11869 DIP("movs%cdup %s,%s\n", isH
? 'h' : 'l',
11871 nameXMMReg(gregOfRM(modrm
)));
11875 breakup128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
11876 putXMMReg( gregOfRM(modrm
),
11877 isH
? mk128from32s( s3
, s3
, s1
, s1
)
11878 : mk128from32s( s2
, s2
, s0
, s0
) );
11879 goto decode_success
;
11882 /* F2 0F 12 = MOVDDUP -- move from E (mem or xmm) to G (xmm),
11883 duplicating some lanes (0:1:0:1). */
11884 if (sz
== 4 && insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0x12) {
11885 IRTemp sV
= newTemp(Ity_V128
);
11886 IRTemp d0
= newTemp(Ity_I64
);
11889 if (epartIsReg(modrm
)) {
11890 assign( sV
, getXMMReg( eregOfRM(modrm
)) );
11891 DIP("movddup %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
11892 nameXMMReg(gregOfRM(modrm
)));
11894 assign ( d0
, unop(Iop_V128to64
, mkexpr(sV
)) );
11896 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
11897 assign( d0
, loadLE(Ity_I64
, mkexpr(addr
)) );
11898 DIP("movddup %s,%s\n", dis_buf
,
11899 nameXMMReg(gregOfRM(modrm
)));
11903 putXMMReg( gregOfRM(modrm
), binop(Iop_64HLtoV128
,mkexpr(d0
),mkexpr(d0
)) );
11904 goto decode_success
;
11907 /* F2 0F D0 = ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xmm). */
11908 if (sz
== 4 && insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0xD0) {
11909 IRTemp a3
, a2
, a1
, a0
, s3
, s2
, s1
, s0
;
11910 IRTemp eV
= newTemp(Ity_V128
);
11911 IRTemp gV
= newTemp(Ity_V128
);
11912 IRTemp addV
= newTemp(Ity_V128
);
11913 IRTemp subV
= newTemp(Ity_V128
);
11914 IRTemp rm
= newTemp(Ity_I32
);
11915 a3
= a2
= a1
= a0
= s3
= s2
= s1
= s0
= IRTemp_INVALID
;
11918 if (epartIsReg(modrm
)) {
11919 assign( eV
, getXMMReg( eregOfRM(modrm
)) );
11920 DIP("addsubps %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
11921 nameXMMReg(gregOfRM(modrm
)));
11924 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
11925 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
11926 DIP("addsubps %s,%s\n", dis_buf
,
11927 nameXMMReg(gregOfRM(modrm
)));
11931 assign( gV
, getXMMReg(gregOfRM(modrm
)) );
11933 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11934 assign( addV
, triop(Iop_Add32Fx4
, mkexpr(rm
), mkexpr(gV
), mkexpr(eV
)) );
11935 assign( subV
, triop(Iop_Sub32Fx4
, mkexpr(rm
), mkexpr(gV
), mkexpr(eV
)) );
11937 breakup128to32s( addV
, &a3
, &a2
, &a1
, &a0
);
11938 breakup128to32s( subV
, &s3
, &s2
, &s1
, &s0
);
11940 putXMMReg( gregOfRM(modrm
), mk128from32s( a3
, s2
, a1
, s0
));
11941 goto decode_success
;
11944 /* 66 0F D0 = ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). */
11945 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xD0) {
11946 IRTemp eV
= newTemp(Ity_V128
);
11947 IRTemp gV
= newTemp(Ity_V128
);
11948 IRTemp addV
= newTemp(Ity_V128
);
11949 IRTemp subV
= newTemp(Ity_V128
);
11950 IRTemp a1
= newTemp(Ity_I64
);
11951 IRTemp s0
= newTemp(Ity_I64
);
11952 IRTemp rm
= newTemp(Ity_I32
);
11955 if (epartIsReg(modrm
)) {
11956 assign( eV
, getXMMReg( eregOfRM(modrm
)) );
11957 DIP("addsubpd %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
11958 nameXMMReg(gregOfRM(modrm
)));
11961 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
11962 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
11963 DIP("addsubpd %s,%s\n", dis_buf
,
11964 nameXMMReg(gregOfRM(modrm
)));
11968 assign( gV
, getXMMReg(gregOfRM(modrm
)) );
11970 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11971 assign( addV
, triop(Iop_Add64Fx2
, mkexpr(rm
), mkexpr(gV
), mkexpr(eV
)) );
11972 assign( subV
, triop(Iop_Sub64Fx2
, mkexpr(rm
), mkexpr(gV
), mkexpr(eV
)) );
11974 assign( a1
, unop(Iop_V128HIto64
, mkexpr(addV
) ));
11975 assign( s0
, unop(Iop_V128to64
, mkexpr(subV
) ));
11977 putXMMReg( gregOfRM(modrm
),
11978 binop(Iop_64HLtoV128
, mkexpr(a1
), mkexpr(s0
)) );
11979 goto decode_success
;
11982 /* F2 0F 7D = HSUBPS -- 32x4 sub across from E (mem or xmm) to G (xmm). */
11983 /* F2 0F 7C = HADDPS -- 32x4 add across from E (mem or xmm) to G (xmm). */
11984 if (sz
== 4 && insn
[0] == 0xF2 && insn
[1] == 0x0F
11985 && (insn
[2] == 0x7C || insn
[2] == 0x7D)) {
11986 IRTemp e3
, e2
, e1
, e0
, g3
, g2
, g1
, g0
;
11987 IRTemp eV
= newTemp(Ity_V128
);
11988 IRTemp gV
= newTemp(Ity_V128
);
11989 IRTemp leftV
= newTemp(Ity_V128
);
11990 IRTemp rightV
= newTemp(Ity_V128
);
11991 IRTemp rm
= newTemp(Ity_I32
);
11992 Bool isAdd
= insn
[2] == 0x7C;
11993 const HChar
* str
= isAdd
? "add" : "sub";
11994 e3
= e2
= e1
= e0
= g3
= g2
= g1
= g0
= IRTemp_INVALID
;
11997 if (epartIsReg(modrm
)) {
11998 assign( eV
, getXMMReg( eregOfRM(modrm
)) );
11999 DIP("h%sps %s,%s\n", str
, nameXMMReg(eregOfRM(modrm
)),
12000 nameXMMReg(gregOfRM(modrm
)));
12003 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12004 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
12005 DIP("h%sps %s,%s\n", str
, dis_buf
,
12006 nameXMMReg(gregOfRM(modrm
)));
12010 assign( gV
, getXMMReg(gregOfRM(modrm
)) );
12012 breakup128to32s( eV
, &e3
, &e2
, &e1
, &e0
);
12013 breakup128to32s( gV
, &g3
, &g2
, &g1
, &g0
);
12015 assign( leftV
, mk128from32s( e2
, e0
, g2
, g0
) );
12016 assign( rightV
, mk128from32s( e3
, e1
, g3
, g1
) );
12018 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
12019 putXMMReg( gregOfRM(modrm
),
12020 triop(isAdd
? Iop_Add32Fx4
: Iop_Sub32Fx4
,
12021 mkexpr(rm
), mkexpr(leftV
), mkexpr(rightV
) ) );
12022 goto decode_success
;
12025 /* 66 0F 7D = HSUBPD -- 64x2 sub across from E (mem or xmm) to G (xmm). */
12026 /* 66 0F 7C = HADDPD -- 64x2 add across from E (mem or xmm) to G (xmm). */
12027 if (sz
== 2 && insn
[0] == 0x0F && (insn
[1] == 0x7C || insn
[1] == 0x7D)) {
12028 IRTemp e1
= newTemp(Ity_I64
);
12029 IRTemp e0
= newTemp(Ity_I64
);
12030 IRTemp g1
= newTemp(Ity_I64
);
12031 IRTemp g0
= newTemp(Ity_I64
);
12032 IRTemp eV
= newTemp(Ity_V128
);
12033 IRTemp gV
= newTemp(Ity_V128
);
12034 IRTemp leftV
= newTemp(Ity_V128
);
12035 IRTemp rightV
= newTemp(Ity_V128
);
12036 IRTemp rm
= newTemp(Ity_I32
);
12037 Bool isAdd
= insn
[1] == 0x7C;
12038 const HChar
* str
= isAdd
? "add" : "sub";
12041 if (epartIsReg(modrm
)) {
12042 assign( eV
, getXMMReg( eregOfRM(modrm
)) );
12043 DIP("h%spd %s,%s\n", str
, nameXMMReg(eregOfRM(modrm
)),
12044 nameXMMReg(gregOfRM(modrm
)));
12047 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
12048 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
12049 DIP("h%spd %s,%s\n", str
, dis_buf
,
12050 nameXMMReg(gregOfRM(modrm
)));
12054 assign( gV
, getXMMReg(gregOfRM(modrm
)) );
12056 assign( e1
, unop(Iop_V128HIto64
, mkexpr(eV
) ));
12057 assign( e0
, unop(Iop_V128to64
, mkexpr(eV
) ));
12058 assign( g1
, unop(Iop_V128HIto64
, mkexpr(gV
) ));
12059 assign( g0
, unop(Iop_V128to64
, mkexpr(gV
) ));
12061 assign( leftV
, binop(Iop_64HLtoV128
, mkexpr(e0
),mkexpr(g0
)) );
12062 assign( rightV
, binop(Iop_64HLtoV128
, mkexpr(e1
),mkexpr(g1
)) );
12064 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
12065 putXMMReg( gregOfRM(modrm
),
12066 triop(isAdd
? Iop_Add64Fx2
: Iop_Sub64Fx2
,
12067 mkexpr(rm
), mkexpr(leftV
), mkexpr(rightV
) ) );
12068 goto decode_success
;
12071 /* F2 0F F0 = LDDQU -- move from E (mem or xmm) to G (xmm). */
12072 if (sz
== 4 && insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0xF0) {
12073 modrm
= getIByte(delta
+3);
12074 if (epartIsReg(modrm
)) {
12075 goto decode_failure
;
12077 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12078 putXMMReg( gregOfRM(modrm
),
12079 loadLE(Ity_V128
, mkexpr(addr
)) );
12080 DIP("lddqu %s,%s\n", dis_buf
,
12081 nameXMMReg(gregOfRM(modrm
)));
12084 goto decode_success
;
12087 /* ---------------------------------------------------- */
12088 /* --- end of the SSE3 decoder. --- */
12089 /* ---------------------------------------------------- */
12091 /* ---------------------------------------------------- */
12092 /* --- start of the SSSE3 decoder. --- */
12093 /* ---------------------------------------------------- */
12095 /* 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
12096 Unsigned Bytes (MMX) */
12098 && insn
[0] == 0x0F && insn
[1] == 0x38 && insn
[2] == 0x04) {
12099 IRTemp sV
= newTemp(Ity_I64
);
12100 IRTemp dV
= newTemp(Ity_I64
);
12101 IRTemp sVoddsSX
= newTemp(Ity_I64
);
12102 IRTemp sVevensSX
= newTemp(Ity_I64
);
12103 IRTemp dVoddsZX
= newTemp(Ity_I64
);
12104 IRTemp dVevensZX
= newTemp(Ity_I64
);
12108 assign( dV
, getMMXReg(gregOfRM(modrm
)) );
12110 if (epartIsReg(modrm
)) {
12111 assign( sV
, getMMXReg(eregOfRM(modrm
)) );
12113 DIP("pmaddubsw %s,%s\n", nameMMXReg(eregOfRM(modrm
)),
12114 nameMMXReg(gregOfRM(modrm
)));
12116 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12117 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
12119 DIP("pmaddubsw %s,%s\n", dis_buf
,
12120 nameMMXReg(gregOfRM(modrm
)));
12123 /* compute dV unsigned x sV signed */
12125 binop(Iop_SarN16x4
, mkexpr(sV
), mkU8(8)) );
12127 binop(Iop_SarN16x4
,
12128 binop(Iop_ShlN16x4
, mkexpr(sV
), mkU8(8)),
12131 binop(Iop_ShrN16x4
, mkexpr(dV
), mkU8(8)) );
12133 binop(Iop_ShrN16x4
,
12134 binop(Iop_ShlN16x4
, mkexpr(dV
), mkU8(8)),
12139 binop(Iop_QAdd16Sx4
,
12140 binop(Iop_Mul16x4
, mkexpr(sVoddsSX
), mkexpr(dVoddsZX
)),
12141 binop(Iop_Mul16x4
, mkexpr(sVevensSX
), mkexpr(dVevensZX
))
12144 goto decode_success
;
12147 /* 66 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
12148 Unsigned Bytes (XMM) */
12150 && insn
[0] == 0x0F && insn
[1] == 0x38 && insn
[2] == 0x04) {
12151 IRTemp sV
= newTemp(Ity_V128
);
12152 IRTemp dV
= newTemp(Ity_V128
);
12153 IRTemp sVoddsSX
= newTemp(Ity_V128
);
12154 IRTemp sVevensSX
= newTemp(Ity_V128
);
12155 IRTemp dVoddsZX
= newTemp(Ity_V128
);
12156 IRTemp dVevensZX
= newTemp(Ity_V128
);
12159 assign( dV
, getXMMReg(gregOfRM(modrm
)) );
12161 if (epartIsReg(modrm
)) {
12162 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
12164 DIP("pmaddubsw %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
12165 nameXMMReg(gregOfRM(modrm
)));
12167 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12168 gen_SEGV_if_not_16_aligned( addr
);
12169 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
12171 DIP("pmaddubsw %s,%s\n", dis_buf
,
12172 nameXMMReg(gregOfRM(modrm
)));
12175 /* compute dV unsigned x sV signed */
12177 binop(Iop_SarN16x8
, mkexpr(sV
), mkU8(8)) );
12179 binop(Iop_SarN16x8
,
12180 binop(Iop_ShlN16x8
, mkexpr(sV
), mkU8(8)),
12183 binop(Iop_ShrN16x8
, mkexpr(dV
), mkU8(8)) );
12185 binop(Iop_ShrN16x8
,
12186 binop(Iop_ShlN16x8
, mkexpr(dV
), mkU8(8)),
12191 binop(Iop_QAdd16Sx8
,
12192 binop(Iop_Mul16x8
, mkexpr(sVoddsSX
), mkexpr(dVoddsZX
)),
12193 binop(Iop_Mul16x8
, mkexpr(sVevensSX
), mkexpr(dVevensZX
))
12196 goto decode_success
;
12199 /* ***--- these are MMX class insns introduced in SSSE3 ---*** */
12200 /* 0F 38 03 = PHADDSW -- 16x4 signed qadd across from E (mem or
12201 mmx) and G to G (mmx). */
12202 /* 0F 38 07 = PHSUBSW -- 16x4 signed qsub across from E (mem or
12203 mmx) and G to G (mmx). */
12204 /* 0F 38 01 = PHADDW -- 16x4 add across from E (mem or mmx) and G
12206 /* 0F 38 05 = PHSUBW -- 16x4 sub across from E (mem or mmx) and G
12208 /* 0F 38 02 = PHADDD -- 32x2 add across from E (mem or mmx) and G
12210 /* 0F 38 06 = PHSUBD -- 32x2 sub across from E (mem or mmx) and G
12214 && insn
[0] == 0x0F && insn
[1] == 0x38
12215 && (insn
[2] == 0x03 || insn
[2] == 0x07 || insn
[2] == 0x01
12216 || insn
[2] == 0x05 || insn
[2] == 0x02 || insn
[2] == 0x06)) {
12217 const HChar
* str
= "???";
12218 IROp opV64
= Iop_INVALID
;
12219 IROp opCatO
= Iop_CatOddLanes16x4
;
12220 IROp opCatE
= Iop_CatEvenLanes16x4
;
12221 IRTemp sV
= newTemp(Ity_I64
);
12222 IRTemp dV
= newTemp(Ity_I64
);
12227 case 0x03: opV64
= Iop_QAdd16Sx4
; str
= "addsw"; break;
12228 case 0x07: opV64
= Iop_QSub16Sx4
; str
= "subsw"; break;
12229 case 0x01: opV64
= Iop_Add16x4
; str
= "addw"; break;
12230 case 0x05: opV64
= Iop_Sub16x4
; str
= "subw"; break;
12231 case 0x02: opV64
= Iop_Add32x2
; str
= "addd"; break;
12232 case 0x06: opV64
= Iop_Sub32x2
; str
= "subd"; break;
12233 default: vassert(0);
12235 if (insn
[2] == 0x02 || insn
[2] == 0x06) {
12236 opCatO
= Iop_InterleaveHI32x2
;
12237 opCatE
= Iop_InterleaveLO32x2
;
12241 assign( dV
, getMMXReg(gregOfRM(modrm
)) );
12243 if (epartIsReg(modrm
)) {
12244 assign( sV
, getMMXReg(eregOfRM(modrm
)) );
12246 DIP("ph%s %s,%s\n", str
, nameMMXReg(eregOfRM(modrm
)),
12247 nameMMXReg(gregOfRM(modrm
)));
12249 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12250 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
12252 DIP("ph%s %s,%s\n", str
, dis_buf
,
12253 nameMMXReg(gregOfRM(modrm
)));
12259 binop(opCatE
,mkexpr(sV
),mkexpr(dV
)),
12260 binop(opCatO
,mkexpr(sV
),mkexpr(dV
))
12263 goto decode_success
;
12266 /* 66 0F 38 03 = PHADDSW -- 16x8 signed qadd across from E (mem or
12267 xmm) and G to G (xmm). */
12268 /* 66 0F 38 07 = PHSUBSW -- 16x8 signed qsub across from E (mem or
12269 xmm) and G to G (xmm). */
12270 /* 66 0F 38 01 = PHADDW -- 16x8 add across from E (mem or xmm) and
12272 /* 66 0F 38 05 = PHSUBW -- 16x8 sub across from E (mem or xmm) and
12274 /* 66 0F 38 02 = PHADDD -- 32x4 add across from E (mem or xmm) and
12276 /* 66 0F 38 06 = PHSUBD -- 32x4 sub across from E (mem or xmm) and
12280 && insn
[0] == 0x0F && insn
[1] == 0x38
12281 && (insn
[2] == 0x03 || insn
[2] == 0x07 || insn
[2] == 0x01
12282 || insn
[2] == 0x05 || insn
[2] == 0x02 || insn
[2] == 0x06)) {
12283 const HChar
* str
= "???";
12284 IROp opV64
= Iop_INVALID
;
12285 IROp opCatO
= Iop_CatOddLanes16x4
;
12286 IROp opCatE
= Iop_CatEvenLanes16x4
;
12287 IRTemp sV
= newTemp(Ity_V128
);
12288 IRTemp dV
= newTemp(Ity_V128
);
12289 IRTemp sHi
= newTemp(Ity_I64
);
12290 IRTemp sLo
= newTemp(Ity_I64
);
12291 IRTemp dHi
= newTemp(Ity_I64
);
12292 IRTemp dLo
= newTemp(Ity_I64
);
12297 case 0x03: opV64
= Iop_QAdd16Sx4
; str
= "addsw"; break;
12298 case 0x07: opV64
= Iop_QSub16Sx4
; str
= "subsw"; break;
12299 case 0x01: opV64
= Iop_Add16x4
; str
= "addw"; break;
12300 case 0x05: opV64
= Iop_Sub16x4
; str
= "subw"; break;
12301 case 0x02: opV64
= Iop_Add32x2
; str
= "addd"; break;
12302 case 0x06: opV64
= Iop_Sub32x2
; str
= "subd"; break;
12303 default: vassert(0);
12305 if (insn
[2] == 0x02 || insn
[2] == 0x06) {
12306 opCatO
= Iop_InterleaveHI32x2
;
12307 opCatE
= Iop_InterleaveLO32x2
;
12310 assign( dV
, getXMMReg(gregOfRM(modrm
)) );
12312 if (epartIsReg(modrm
)) {
12313 assign( sV
, getXMMReg( eregOfRM(modrm
)) );
12314 DIP("ph%s %s,%s\n", str
, nameXMMReg(eregOfRM(modrm
)),
12315 nameXMMReg(gregOfRM(modrm
)));
12318 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12319 gen_SEGV_if_not_16_aligned( addr
);
12320 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
12321 DIP("ph%s %s,%s\n", str
, dis_buf
,
12322 nameXMMReg(gregOfRM(modrm
)));
12326 assign( dHi
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
12327 assign( dLo
, unop(Iop_V128to64
, mkexpr(dV
)) );
12328 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
12329 assign( sLo
, unop(Iop_V128to64
, mkexpr(sV
)) );
12331 /* This isn't a particularly efficient way to compute the
12332 result, but at least it avoids a proliferation of IROps,
12333 hence avoids complication all the backends. */
12336 binop(Iop_64HLtoV128
,
12338 binop(opCatE
,mkexpr(sHi
),mkexpr(sLo
)),
12339 binop(opCatO
,mkexpr(sHi
),mkexpr(sLo
))
12342 binop(opCatE
,mkexpr(dHi
),mkexpr(dLo
)),
12343 binop(opCatO
,mkexpr(dHi
),mkexpr(dLo
))
12347 goto decode_success
;
12350 /* 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and Scale
12353 && insn
[0] == 0x0F && insn
[1] == 0x38 && insn
[2] == 0x0B) {
12354 IRTemp sV
= newTemp(Ity_I64
);
12355 IRTemp dV
= newTemp(Ity_I64
);
12359 assign( dV
, getMMXReg(gregOfRM(modrm
)) );
12361 if (epartIsReg(modrm
)) {
12362 assign( sV
, getMMXReg(eregOfRM(modrm
)) );
12364 DIP("pmulhrsw %s,%s\n", nameMMXReg(eregOfRM(modrm
)),
12365 nameMMXReg(gregOfRM(modrm
)));
12367 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12368 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
12370 DIP("pmulhrsw %s,%s\n", dis_buf
,
12371 nameMMXReg(gregOfRM(modrm
)));
12376 dis_PMULHRSW_helper( mkexpr(sV
), mkexpr(dV
) )
12378 goto decode_success
;
12381 /* 66 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and
12384 && insn
[0] == 0x0F && insn
[1] == 0x38 && insn
[2] == 0x0B) {
12385 IRTemp sV
= newTemp(Ity_V128
);
12386 IRTemp dV
= newTemp(Ity_V128
);
12387 IRTemp sHi
= newTemp(Ity_I64
);
12388 IRTemp sLo
= newTemp(Ity_I64
);
12389 IRTemp dHi
= newTemp(Ity_I64
);
12390 IRTemp dLo
= newTemp(Ity_I64
);
12393 assign( dV
, getXMMReg(gregOfRM(modrm
)) );
12395 if (epartIsReg(modrm
)) {
12396 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
12398 DIP("pmulhrsw %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
12399 nameXMMReg(gregOfRM(modrm
)));
12401 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12402 gen_SEGV_if_not_16_aligned( addr
);
12403 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
12405 DIP("pmulhrsw %s,%s\n", dis_buf
,
12406 nameXMMReg(gregOfRM(modrm
)));
12409 assign( dHi
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
12410 assign( dLo
, unop(Iop_V128to64
, mkexpr(dV
)) );
12411 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
12412 assign( sLo
, unop(Iop_V128to64
, mkexpr(sV
)) );
12416 binop(Iop_64HLtoV128
,
12417 dis_PMULHRSW_helper( mkexpr(sHi
), mkexpr(dHi
) ),
12418 dis_PMULHRSW_helper( mkexpr(sLo
), mkexpr(dLo
) )
12421 goto decode_success
;
12424 /* 0F 38 08 = PSIGNB -- Packed Sign 8x8 (MMX) */
12425 /* 0F 38 09 = PSIGNW -- Packed Sign 16x4 (MMX) */
12426 /* 0F 38 09 = PSIGND -- Packed Sign 32x2 (MMX) */
12428 && insn
[0] == 0x0F && insn
[1] == 0x38
12429 && (insn
[2] == 0x08 || insn
[2] == 0x09 || insn
[2] == 0x0A)) {
12430 IRTemp sV
= newTemp(Ity_I64
);
12431 IRTemp dV
= newTemp(Ity_I64
);
12432 const HChar
* str
= "???";
12436 case 0x08: laneszB
= 1; str
= "b"; break;
12437 case 0x09: laneszB
= 2; str
= "w"; break;
12438 case 0x0A: laneszB
= 4; str
= "d"; break;
12439 default: vassert(0);
12444 assign( dV
, getMMXReg(gregOfRM(modrm
)) );
12446 if (epartIsReg(modrm
)) {
12447 assign( sV
, getMMXReg(eregOfRM(modrm
)) );
12449 DIP("psign%s %s,%s\n", str
, nameMMXReg(eregOfRM(modrm
)),
12450 nameMMXReg(gregOfRM(modrm
)));
12452 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12453 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
12455 DIP("psign%s %s,%s\n", str
, dis_buf
,
12456 nameMMXReg(gregOfRM(modrm
)));
12461 dis_PSIGN_helper( mkexpr(sV
), mkexpr(dV
), laneszB
)
12463 goto decode_success
;
12466 /* 66 0F 38 08 = PSIGNB -- Packed Sign 8x16 (XMM) */
12467 /* 66 0F 38 09 = PSIGNW -- Packed Sign 16x8 (XMM) */
12468 /* 66 0F 38 09 = PSIGND -- Packed Sign 32x4 (XMM) */
12470 && insn
[0] == 0x0F && insn
[1] == 0x38
12471 && (insn
[2] == 0x08 || insn
[2] == 0x09 || insn
[2] == 0x0A)) {
12472 IRTemp sV
= newTemp(Ity_V128
);
12473 IRTemp dV
= newTemp(Ity_V128
);
12474 IRTemp sHi
= newTemp(Ity_I64
);
12475 IRTemp sLo
= newTemp(Ity_I64
);
12476 IRTemp dHi
= newTemp(Ity_I64
);
12477 IRTemp dLo
= newTemp(Ity_I64
);
12478 const HChar
* str
= "???";
12482 case 0x08: laneszB
= 1; str
= "b"; break;
12483 case 0x09: laneszB
= 2; str
= "w"; break;
12484 case 0x0A: laneszB
= 4; str
= "d"; break;
12485 default: vassert(0);
12489 assign( dV
, getXMMReg(gregOfRM(modrm
)) );
12491 if (epartIsReg(modrm
)) {
12492 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
12494 DIP("psign%s %s,%s\n", str
, nameXMMReg(eregOfRM(modrm
)),
12495 nameXMMReg(gregOfRM(modrm
)));
12497 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12498 gen_SEGV_if_not_16_aligned( addr
);
12499 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
12501 DIP("psign%s %s,%s\n", str
, dis_buf
,
12502 nameXMMReg(gregOfRM(modrm
)));
12505 assign( dHi
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
12506 assign( dLo
, unop(Iop_V128to64
, mkexpr(dV
)) );
12507 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
12508 assign( sLo
, unop(Iop_V128to64
, mkexpr(sV
)) );
12512 binop(Iop_64HLtoV128
,
12513 dis_PSIGN_helper( mkexpr(sHi
), mkexpr(dHi
), laneszB
),
12514 dis_PSIGN_helper( mkexpr(sLo
), mkexpr(dLo
), laneszB
)
12517 goto decode_success
;
12520 /* 0F 38 1C = PABSB -- Packed Absolute Value 8x8 (MMX) */
12521 /* 0F 38 1D = PABSW -- Packed Absolute Value 16x4 (MMX) */
12522 /* 0F 38 1E = PABSD -- Packed Absolute Value 32x2 (MMX) */
12524 && insn
[0] == 0x0F && insn
[1] == 0x38
12525 && (insn
[2] == 0x1C || insn
[2] == 0x1D || insn
[2] == 0x1E)) {
12526 IRTemp sV
= newTemp(Ity_I64
);
12527 const HChar
* str
= "???";
12531 case 0x1C: laneszB
= 1; str
= "b"; break;
12532 case 0x1D: laneszB
= 2; str
= "w"; break;
12533 case 0x1E: laneszB
= 4; str
= "d"; break;
12534 default: vassert(0);
12540 if (epartIsReg(modrm
)) {
12541 assign( sV
, getMMXReg(eregOfRM(modrm
)) );
12543 DIP("pabs%s %s,%s\n", str
, nameMMXReg(eregOfRM(modrm
)),
12544 nameMMXReg(gregOfRM(modrm
)));
12546 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12547 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
12549 DIP("pabs%s %s,%s\n", str
, dis_buf
,
12550 nameMMXReg(gregOfRM(modrm
)));
12555 dis_PABS_helper( mkexpr(sV
), laneszB
)
12557 goto decode_success
;
12560 /* 66 0F 38 1C = PABSB -- Packed Absolute Value 8x16 (XMM) */
12561 /* 66 0F 38 1D = PABSW -- Packed Absolute Value 16x8 (XMM) */
12562 /* 66 0F 38 1E = PABSD -- Packed Absolute Value 32x4 (XMM) */
12564 && insn
[0] == 0x0F && insn
[1] == 0x38
12565 && (insn
[2] == 0x1C || insn
[2] == 0x1D || insn
[2] == 0x1E)) {
12566 IRTemp sV
= newTemp(Ity_V128
);
12567 IRTemp sHi
= newTemp(Ity_I64
);
12568 IRTemp sLo
= newTemp(Ity_I64
);
12569 const HChar
* str
= "???";
12573 case 0x1C: laneszB
= 1; str
= "b"; break;
12574 case 0x1D: laneszB
= 2; str
= "w"; break;
12575 case 0x1E: laneszB
= 4; str
= "d"; break;
12576 default: vassert(0);
12581 if (epartIsReg(modrm
)) {
12582 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
12584 DIP("pabs%s %s,%s\n", str
, nameXMMReg(eregOfRM(modrm
)),
12585 nameXMMReg(gregOfRM(modrm
)));
12587 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12588 gen_SEGV_if_not_16_aligned( addr
);
12589 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
12591 DIP("pabs%s %s,%s\n", str
, dis_buf
,
12592 nameXMMReg(gregOfRM(modrm
)));
12595 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
12596 assign( sLo
, unop(Iop_V128to64
, mkexpr(sV
)) );
12600 binop(Iop_64HLtoV128
,
12601 dis_PABS_helper( mkexpr(sHi
), laneszB
),
12602 dis_PABS_helper( mkexpr(sLo
), laneszB
)
12605 goto decode_success
;
12608 /* 0F 3A 0F = PALIGNR -- Packed Align Right (MMX) */
12610 && insn
[0] == 0x0F && insn
[1] == 0x3A && insn
[2] == 0x0F) {
12611 IRTemp sV
= newTemp(Ity_I64
);
12612 IRTemp dV
= newTemp(Ity_I64
);
12613 IRTemp res
= newTemp(Ity_I64
);
12617 assign( dV
, getMMXReg(gregOfRM(modrm
)) );
12619 if (epartIsReg(modrm
)) {
12620 assign( sV
, getMMXReg(eregOfRM(modrm
)) );
12621 d32
= (UInt
)insn
[3+1];
12623 DIP("palignr $%u,%s,%s\n", d32
,
12624 nameMMXReg(eregOfRM(modrm
)),
12625 nameMMXReg(gregOfRM(modrm
)));
12627 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12628 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
12629 d32
= (UInt
)insn
[3+alen
];
12631 DIP("palignr $%u%s,%s\n", d32
,
12633 nameMMXReg(gregOfRM(modrm
)));
12637 assign( res
, mkexpr(sV
) );
12639 else if (d32
>= 1 && d32
<= 7) {
12642 binop(Iop_Shr64
, mkexpr(sV
), mkU8(8*d32
)),
12643 binop(Iop_Shl64
, mkexpr(dV
), mkU8(8*(8-d32
))
12646 else if (d32
== 8) {
12647 assign( res
, mkexpr(dV
) );
12649 else if (d32
>= 9 && d32
<= 15) {
12650 assign( res
, binop(Iop_Shr64
, mkexpr(dV
), mkU8(8*(d32
-8))) );
12652 else if (d32
>= 16 && d32
<= 255) {
12653 assign( res
, mkU64(0) );
12658 putMMXReg( gregOfRM(modrm
), mkexpr(res
) );
12659 goto decode_success
;
12662 /* 66 0F 3A 0F = PALIGNR -- Packed Align Right (XMM) */
12664 && insn
[0] == 0x0F && insn
[1] == 0x3A && insn
[2] == 0x0F) {
12665 IRTemp sV
= newTemp(Ity_V128
);
12666 IRTemp dV
= newTemp(Ity_V128
);
12667 IRTemp sHi
= newTemp(Ity_I64
);
12668 IRTemp sLo
= newTemp(Ity_I64
);
12669 IRTemp dHi
= newTemp(Ity_I64
);
12670 IRTemp dLo
= newTemp(Ity_I64
);
12671 IRTemp rHi
= newTemp(Ity_I64
);
12672 IRTemp rLo
= newTemp(Ity_I64
);
12675 assign( dV
, getXMMReg(gregOfRM(modrm
)) );
12677 if (epartIsReg(modrm
)) {
12678 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
12679 d32
= (UInt
)insn
[3+1];
12681 DIP("palignr $%u,%s,%s\n", d32
,
12682 nameXMMReg(eregOfRM(modrm
)),
12683 nameXMMReg(gregOfRM(modrm
)));
12685 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12686 gen_SEGV_if_not_16_aligned( addr
);
12687 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
12688 d32
= (UInt
)insn
[3+alen
];
12690 DIP("palignr $%u,%s,%s\n", d32
,
12692 nameXMMReg(gregOfRM(modrm
)));
12695 assign( dHi
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
12696 assign( dLo
, unop(Iop_V128to64
, mkexpr(dV
)) );
12697 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
12698 assign( sLo
, unop(Iop_V128to64
, mkexpr(sV
)) );
12701 assign( rHi
, mkexpr(sHi
) );
12702 assign( rLo
, mkexpr(sLo
) );
12704 else if (d32
>= 1 && d32
<= 7) {
12705 assign( rHi
, dis_PALIGNR_XMM_helper(dLo
, sHi
, d32
) );
12706 assign( rLo
, dis_PALIGNR_XMM_helper(sHi
, sLo
, d32
) );
12708 else if (d32
== 8) {
12709 assign( rHi
, mkexpr(dLo
) );
12710 assign( rLo
, mkexpr(sHi
) );
12712 else if (d32
>= 9 && d32
<= 15) {
12713 assign( rHi
, dis_PALIGNR_XMM_helper(dHi
, dLo
, d32
-8) );
12714 assign( rLo
, dis_PALIGNR_XMM_helper(dLo
, sHi
, d32
-8) );
12716 else if (d32
== 16) {
12717 assign( rHi
, mkexpr(dHi
) );
12718 assign( rLo
, mkexpr(dLo
) );
12720 else if (d32
>= 17 && d32
<= 23) {
12721 assign( rHi
, binop(Iop_Shr64
, mkexpr(dHi
), mkU8(8*(d32
-16))) );
12722 assign( rLo
, dis_PALIGNR_XMM_helper(dHi
, dLo
, d32
-16) );
12724 else if (d32
== 24) {
12725 assign( rHi
, mkU64(0) );
12726 assign( rLo
, mkexpr(dHi
) );
12728 else if (d32
>= 25 && d32
<= 31) {
12729 assign( rHi
, mkU64(0) );
12730 assign( rLo
, binop(Iop_Shr64
, mkexpr(dHi
), mkU8(8*(d32
-24))) );
12732 else if (d32
>= 32 && d32
<= 255) {
12733 assign( rHi
, mkU64(0) );
12734 assign( rLo
, mkU64(0) );
12741 binop(Iop_64HLtoV128
, mkexpr(rHi
), mkexpr(rLo
))
12743 goto decode_success
;
12746 /* 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x8 (MMX) */
12748 && insn
[0] == 0x0F && insn
[1] == 0x38 && insn
[2] == 0x00) {
12749 IRTemp sV
= newTemp(Ity_I64
);
12750 IRTemp dV
= newTemp(Ity_I64
);
12754 assign( dV
, getMMXReg(gregOfRM(modrm
)) );
12756 if (epartIsReg(modrm
)) {
12757 assign( sV
, getMMXReg(eregOfRM(modrm
)) );
12759 DIP("pshufb %s,%s\n", nameMMXReg(eregOfRM(modrm
)),
12760 nameMMXReg(gregOfRM(modrm
)));
12762 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12763 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
12765 DIP("pshufb %s,%s\n", dis_buf
,
12766 nameMMXReg(gregOfRM(modrm
)));
12773 /* permute the lanes */
12777 binop(Iop_And64
, mkexpr(sV
), mkU64(0x0707070707070707ULL
))
12779 /* mask off lanes which have (index & 0x80) == 0x80 */
12780 unop(Iop_Not64
, binop(Iop_SarN8x8
, mkexpr(sV
), mkU8(7)))
12783 goto decode_success
;
12786 /* 66 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x16 (XMM) */
12788 && insn
[0] == 0x0F && insn
[1] == 0x38 && insn
[2] == 0x00) {
12789 IRTemp sV
= newTemp(Ity_V128
);
12790 IRTemp dV
= newTemp(Ity_V128
);
12791 IRTemp sHi
= newTemp(Ity_I64
);
12792 IRTemp sLo
= newTemp(Ity_I64
);
12793 IRTemp dHi
= newTemp(Ity_I64
);
12794 IRTemp dLo
= newTemp(Ity_I64
);
12795 IRTemp rHi
= newTemp(Ity_I64
);
12796 IRTemp rLo
= newTemp(Ity_I64
);
12797 IRTemp sevens
= newTemp(Ity_I64
);
12798 IRTemp mask0x80hi
= newTemp(Ity_I64
);
12799 IRTemp mask0x80lo
= newTemp(Ity_I64
);
12800 IRTemp maskBit3hi
= newTemp(Ity_I64
);
12801 IRTemp maskBit3lo
= newTemp(Ity_I64
);
12802 IRTemp sAnd7hi
= newTemp(Ity_I64
);
12803 IRTemp sAnd7lo
= newTemp(Ity_I64
);
12804 IRTemp permdHi
= newTemp(Ity_I64
);
12805 IRTemp permdLo
= newTemp(Ity_I64
);
12808 assign( dV
, getXMMReg(gregOfRM(modrm
)) );
12810 if (epartIsReg(modrm
)) {
12811 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
12813 DIP("pshufb %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
12814 nameXMMReg(gregOfRM(modrm
)));
12816 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12817 gen_SEGV_if_not_16_aligned( addr
);
12818 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
12820 DIP("pshufb %s,%s\n", dis_buf
,
12821 nameXMMReg(gregOfRM(modrm
)));
12824 assign( dHi
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
12825 assign( dLo
, unop(Iop_V128to64
, mkexpr(dV
)) );
12826 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
12827 assign( sLo
, unop(Iop_V128to64
, mkexpr(sV
)) );
12829 assign( sevens
, mkU64(0x0707070707070707ULL
) );
12832 mask0x80hi = Not(SarN8x8(sHi,7))
12833 maskBit3hi = SarN8x8(ShlN8x8(sHi,4),7)
12834 sAnd7hi = And(sHi,sevens)
12835 permdHi = Or( And(Perm8x8(dHi,sAnd7hi),maskBit3hi),
12836 And(Perm8x8(dLo,sAnd7hi),Not(maskBit3hi)) )
12837 rHi = And(permdHi,mask0x80hi)
12841 unop(Iop_Not64
, binop(Iop_SarN8x8
,mkexpr(sHi
),mkU8(7))));
12846 binop(Iop_ShlN8x8
,mkexpr(sHi
),mkU8(4)),
12849 assign(sAnd7hi
, binop(Iop_And64
,mkexpr(sHi
),mkexpr(sevens
)));
12856 binop(Iop_Perm8x8
,mkexpr(dHi
),mkexpr(sAnd7hi
)),
12857 mkexpr(maskBit3hi
)),
12859 binop(Iop_Perm8x8
,mkexpr(dLo
),mkexpr(sAnd7hi
)),
12860 unop(Iop_Not64
,mkexpr(maskBit3hi
))) ));
12862 assign(rHi
, binop(Iop_And64
,mkexpr(permdHi
),mkexpr(mask0x80hi
)) );
12864 /* And the same for the lower half of the result. What fun. */
12868 unop(Iop_Not64
, binop(Iop_SarN8x8
,mkexpr(sLo
),mkU8(7))));
12873 binop(Iop_ShlN8x8
,mkexpr(sLo
),mkU8(4)),
12876 assign(sAnd7lo
, binop(Iop_And64
,mkexpr(sLo
),mkexpr(sevens
)));
12883 binop(Iop_Perm8x8
,mkexpr(dHi
),mkexpr(sAnd7lo
)),
12884 mkexpr(maskBit3lo
)),
12886 binop(Iop_Perm8x8
,mkexpr(dLo
),mkexpr(sAnd7lo
)),
12887 unop(Iop_Not64
,mkexpr(maskBit3lo
))) ));
12889 assign(rLo
, binop(Iop_And64
,mkexpr(permdLo
),mkexpr(mask0x80lo
)) );
12893 binop(Iop_64HLtoV128
, mkexpr(rHi
), mkexpr(rLo
))
12895 goto decode_success
;
12898 /* 0F 38 F0 = MOVBE m16/32(E), r16/32(G) */
12899 /* 0F 38 F1 = MOVBE r16/32(G), m16/32(E) */
12900 if ((sz
== 2 || sz
== 4)
12901 && insn
[0] == 0x0F && insn
[1] == 0x38
12902 && (insn
[2] == 0xF0 || insn
[2] == 0xF1)
12903 && !epartIsReg(insn
[3])) {
12906 addr
= disAMode(&alen
, sorb
, delta
+ 3, dis_buf
);
12909 IRTemp src
= newTemp(ty
);
12911 if (insn
[2] == 0xF0) { /* LOAD */
12912 assign(src
, loadLE(ty
, mkexpr(addr
)));
12913 IRTemp dst
= math_BSWAP(src
, ty
);
12914 putIReg(sz
, gregOfRM(modrm
), mkexpr(dst
));
12915 DIP("movbe %s,%s\n", dis_buf
, nameIReg(sz
, gregOfRM(modrm
)));
12916 } else { /* STORE */
12917 assign(src
, getIReg(sz
, gregOfRM(modrm
)));
12918 IRTemp dst
= math_BSWAP(src
, ty
);
12919 storeLE(mkexpr(addr
), mkexpr(dst
));
12920 DIP("movbe %s,%s\n", nameIReg(sz
, gregOfRM(modrm
)), dis_buf
);
12922 goto decode_success
;
12925 /* ---------------------------------------------------- */
12926 /* --- end of the SSSE3 decoder. --- */
12927 /* ---------------------------------------------------- */
12929 /* ---------------------------------------------------- */
12930 /* --- start of the SSE4 decoder --- */
12931 /* ---------------------------------------------------- */
12933 /* 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1
12934 (Partial implementation only -- only deal with cases where
12935 the rounding mode is specified directly by the immediate byte.)
12936 66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1
12937 (Limitations ditto)
12940 && insn
[0] == 0x0F && insn
[1] == 0x3A
12941 && (insn
[2] == 0x0B || insn
[2] == 0x0A)) {
12943 Bool isD
= insn
[2] == 0x0B;
12944 IRTemp src
= newTemp(isD
? Ity_F64
: Ity_F32
);
12945 IRTemp res
= newTemp(isD
? Ity_F64
: Ity_F32
);
12950 if (epartIsReg(modrm
)) {
12952 isD
? getXMMRegLane64F( eregOfRM(modrm
), 0 )
12953 : getXMMRegLane32F( eregOfRM(modrm
), 0 ) );
12955 if (imm
& ~3) goto decode_failure
;
12957 DIP( "rounds%c $%d,%s,%s\n",
12959 imm
, nameXMMReg( eregOfRM(modrm
) ),
12960 nameXMMReg( gregOfRM(modrm
) ) );
12962 addr
= disAMode( &alen
, sorb
, delta
+3, dis_buf
);
12963 assign( src
, loadLE( isD
? Ity_F64
: Ity_F32
, mkexpr(addr
) ));
12964 imm
= insn
[3+alen
];
12965 if (imm
& ~3) goto decode_failure
;
12967 DIP( "roundsd $%d,%s,%s\n",
12968 imm
, dis_buf
, nameXMMReg( gregOfRM(modrm
) ) );
12971 /* (imm & 3) contains an Intel-encoded rounding mode. Because
12972 that encoding is the same as the encoding for IRRoundingMode,
12973 we can use that value directly in the IR as a rounding
12975 assign(res
, binop(isD
? Iop_RoundF64toInt
: Iop_RoundF32toInt
,
12976 mkU32(imm
& 3), mkexpr(src
)) );
12979 putXMMRegLane64F( gregOfRM(modrm
), 0, mkexpr(res
) );
12981 putXMMRegLane32F( gregOfRM(modrm
), 0, mkexpr(res
) );
12983 goto decode_success
;
12986 /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension,
12987 which we can only decode if we're sure this is an AMD cpu that
12988 supports LZCNT, since otherwise it's BSR, which behaves
12990 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0xBD
12991 && 0 != (archinfo
->hwcaps
& VEX_HWCAPS_X86_LZCNT
)) {
12992 vassert(sz
== 2 || sz
== 4);
12993 /*IRType*/ ty
= szToITy(sz
);
12994 IRTemp src
= newTemp(ty
);
12996 if (epartIsReg(modrm
)) {
12997 assign(src
, getIReg(sz
, eregOfRM(modrm
)));
12999 DIP("lzcnt%c %s, %s\n", nameISize(sz
),
13000 nameIReg(sz
, eregOfRM(modrm
)),
13001 nameIReg(sz
, gregOfRM(modrm
)));
13003 addr
= disAMode( &alen
, sorb
, delta
+3, dis_buf
);
13004 assign(src
, loadLE(ty
, mkexpr(addr
)));
13006 DIP("lzcnt%c %s, %s\n", nameISize(sz
), dis_buf
,
13007 nameIReg(sz
, gregOfRM(modrm
)));
13010 IRTemp res
= gen_LZCNT(ty
, src
);
13011 putIReg(sz
, gregOfRM(modrm
), mkexpr(res
));
13013 // Update flags. This is pretty lame .. perhaps can do better
13014 // if this turns out to be performance critical.
13015 // O S A P are cleared. Z is set if RESULT == 0.
13016 // C is set if SRC is zero.
13017 IRTemp src32
= newTemp(Ity_I32
);
13018 IRTemp res32
= newTemp(Ity_I32
);
13019 assign(src32
, widenUto32(mkexpr(src
)));
13020 assign(res32
, widenUto32(mkexpr(res
)));
13022 IRTemp oszacp
= newTemp(Ity_I32
);
13028 binop(Iop_CmpEQ32
, mkexpr(res32
), mkU32(0))),
13029 mkU8(X86G_CC_SHIFT_Z
)),
13032 binop(Iop_CmpEQ32
, mkexpr(src32
), mkU32(0))),
13033 mkU8(X86G_CC_SHIFT_C
))
13037 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
13038 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
13039 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
13040 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(oszacp
) ));
13042 goto decode_success
;
13045 /* ---------------------------------------------------- */
13046 /* --- end of the SSE4 decoder --- */
13047 /* ---------------------------------------------------- */
13049 after_sse_decoders
:
13051 /* ---------------------------------------------------- */
13052 /* --- deal with misc 0x67 pfxs (addr size override) -- */
13053 /* ---------------------------------------------------- */
13055 /* 67 E3 = JCXZ (for JECXZ see below) */
13056 if (insn
[0] == 0x67 && insn
[1] == 0xE3 && sz
== 4) {
13058 d32
= (((Addr32
)guest_EIP_bbstart
)+delta
+1) + getSDisp8(delta
);
13061 binop(Iop_CmpEQ16
, getIReg(2,R_ECX
), mkU16(0)),
13066 DIP("jcxz 0x%x\n", d32
);
13067 goto decode_success
;
13070 /* 67 E8 = CALL with redundant addr16 prefix */
13071 if (insn
[0] == 0x67 && insn
[1] == 0xE8) {
13075 /* ---------------------------------------------------- */
13076 /* --- start of the baseline insn decoder -- */
13077 /* ---------------------------------------------------- */
13079 /* Get the primary opcode. */
13080 opc
= getIByte(delta
); delta
++;
13082 /* We get here if the current insn isn't SSE, or this CPU doesn't
13087 /* ------------------------ Control flow --------------- */
13089 case 0xC2: /* RET imm16 */
13090 d32
= getUDisp16(delta
);
13092 dis_ret(&dres
, d32
);
13093 DIP("ret %u\n", d32
);
13095 case 0xC3: /* RET */
13100 case 0xCF: /* IRET */
13101 /* Note, this is an extremely kludgey and limited implementation
13102 of iret. All it really does is:
13103 popl %EIP; popl %CS; popl %EFLAGS.
13104 %CS is set but ignored (as it is in (eg) popw %cs)". */
13105 t1
= newTemp(Ity_I32
); /* ESP */
13106 t2
= newTemp(Ity_I32
); /* new EIP */
13107 t3
= newTemp(Ity_I32
); /* new CS */
13108 t4
= newTemp(Ity_I32
); /* new EFLAGS */
13109 assign(t1
, getIReg(4,R_ESP
));
13110 assign(t2
, loadLE(Ity_I32
, binop(Iop_Add32
,mkexpr(t1
),mkU32(0) )));
13111 assign(t3
, loadLE(Ity_I32
, binop(Iop_Add32
,mkexpr(t1
),mkU32(4) )));
13112 assign(t4
, loadLE(Ity_I32
, binop(Iop_Add32
,mkexpr(t1
),mkU32(8) )));
13113 /* Get stuff off stack */
13114 putIReg(4, R_ESP
,binop(Iop_Add32
, mkexpr(t1
), mkU32(12)));
13115 /* set %CS (which is ignored anyway) */
13116 putSReg( R_CS
, unop(Iop_32to16
, mkexpr(t3
)) );
13118 set_EFLAGS_from_value( t4
, False
/*!emit_AC_emwarn*/, 0/*unused*/ );
13119 /* goto new EIP value */
13120 jmp_treg(&dres
, Ijk_Ret
, t2
);
13121 vassert(dres
.whatNext
== Dis_StopHere
);
13122 DIP("iret (very kludgey)\n");
13125 case 0xE8: /* CALL J4 */
13126 d32
= getUDisp32(delta
); delta
+= 4;
13127 d32
+= (guest_EIP_bbstart
+delta
);
13128 /* (guest_eip_bbstart+delta) == return-to addr, d32 == call-to addr */
13129 if (d32
== guest_EIP_bbstart
+delta
&& getIByte(delta
) >= 0x58
13130 && getIByte(delta
) <= 0x5F) {
13131 /* Specially treat the position-independent-code idiom
13136 since this generates better code, but for no other reason. */
13137 Int archReg
= getIByte(delta
) - 0x58;
13138 /* vex_printf("-- fPIC thingy\n"); */
13139 putIReg(4, archReg
, mkU32(guest_EIP_bbstart
+delta
));
13140 delta
++; /* Step over the POP */
13141 DIP("call 0x%x ; popl %s\n",d32
,nameIReg(4,archReg
));
13143 /* The normal sequence for a call. */
13144 t1
= newTemp(Ity_I32
);
13145 assign(t1
, binop(Iop_Sub32
, getIReg(4,R_ESP
), mkU32(4)));
13146 putIReg(4, R_ESP
, mkexpr(t1
));
13147 storeLE( mkexpr(t1
), mkU32(guest_EIP_bbstart
+delta
));
13148 if (resteerOkFn( callback_opaque
, (Addr32
)d32
)) {
13149 /* follow into the call target. */
13150 dres
.whatNext
= Dis_ResteerU
;
13151 dres
.continueAt
= (Addr32
)d32
;
13153 jmp_lit(&dres
, Ijk_Call
, d32
);
13154 vassert(dres
.whatNext
== Dis_StopHere
);
13156 DIP("call 0x%x\n",d32
);
13160 //-- case 0xC8: /* ENTER */
13161 //-- d32 = getUDisp16(eip); eip += 2;
13162 //-- abyte = getIByte(delta); delta++;
13164 //-- vg_assert(sz == 4);
13165 //-- vg_assert(abyte == 0);
13167 //-- t1 = newTemp(cb); t2 = newTemp(cb);
13168 //-- uInstr2(cb, GET, sz, ArchReg, R_EBP, TempReg, t1);
13169 //-- uInstr2(cb, GET, 4, ArchReg, R_ESP, TempReg, t2);
13170 //-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2);
13171 //-- uLiteral(cb, sz);
13172 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP);
13173 //-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2);
13174 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_EBP);
13176 //-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2);
13177 //-- uLiteral(cb, d32);
13178 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP);
13180 //-- DIP("enter 0x%x, 0x%x", d32, abyte);
13183 case 0xC9: /* LEAVE */
13185 t1
= newTemp(Ity_I32
); t2
= newTemp(Ity_I32
);
13186 assign(t1
, getIReg(4,R_EBP
));
13187 /* First PUT ESP looks redundant, but need it because ESP must
13188 always be up-to-date for Memcheck to work... */
13189 putIReg(4, R_ESP
, mkexpr(t1
));
13190 assign(t2
, loadLE(Ity_I32
,mkexpr(t1
)));
13191 putIReg(4, R_EBP
, mkexpr(t2
));
13192 putIReg(4, R_ESP
, binop(Iop_Add32
, mkexpr(t1
), mkU32(4)) );
13196 /* ---------------- Misc weird-ass insns --------------- */
13198 case 0x27: /* DAA */
13199 case 0x2F: /* DAS */
13200 case 0x37: /* AAA */
13201 case 0x3F: /* AAS */
13202 /* An ugly implementation for some ugly instructions. Oh
13204 if (sz
!= 4) goto decode_failure
;
13205 t1
= newTemp(Ity_I32
);
13206 t2
= newTemp(Ity_I32
);
13207 /* Make up a 32-bit value (t1), with the old value of AX in the
13208 bottom 16 bits, and the old OSZACP bitmask in the upper 16
13211 binop(Iop_16HLto32
,
13213 mk_x86g_calculate_eflags_all()),
13216 /* Call the helper fn, to get a new AX and OSZACP value, and
13217 poke both back into the guest state. Also pass the helper
13218 the actual opcode so it knows which of the 4 instructions it
13219 is doing the computation for. */
13220 vassert(opc
== 0x27 || opc
== 0x2F || opc
== 0x37 || opc
== 0x3F);
13223 Ity_I32
, 0/*regparm*/, "x86g_calculate_daa_das_aaa_aas",
13224 &x86g_calculate_daa_das_aaa_aas
,
13225 mkIRExprVec_2( mkexpr(t1
), mkU32( opc
& 0xFF) )
13227 putIReg(2, R_EAX
, unop(Iop_32to16
, mkexpr(t2
) ));
13229 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
13230 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
13231 stmt( IRStmt_Put( OFFB_CC_DEP1
,
13233 binop(Iop_Shr32
, mkexpr(t2
), mkU8(16)),
13234 mkU32( X86G_CC_MASK_C
| X86G_CC_MASK_P
13235 | X86G_CC_MASK_A
| X86G_CC_MASK_Z
13236 | X86G_CC_MASK_S
| X86G_CC_MASK_O
)
13240 /* Set NDEP even though it isn't used. This makes redundant-PUT
13241 elimination of previous stores to this field work better. */
13242 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
13244 case 0x27: DIP("daa\n"); break;
13245 case 0x2F: DIP("das\n"); break;
13246 case 0x37: DIP("aaa\n"); break;
13247 case 0x3F: DIP("aas\n"); break;
13248 default: vassert(0);
13252 case 0xD4: /* AAM */
13253 case 0xD5: /* AAD */
13254 d32
= getIByte(delta
); delta
++;
13255 if (sz
!= 4 || d32
!= 10) goto decode_failure
;
13256 t1
= newTemp(Ity_I32
);
13257 t2
= newTemp(Ity_I32
);
13258 /* Make up a 32-bit value (t1), with the old value of AX in the
13259 bottom 16 bits, and the old OSZACP bitmask in the upper 16
13262 binop(Iop_16HLto32
,
13264 mk_x86g_calculate_eflags_all()),
13267 /* Call the helper fn, to get a new AX and OSZACP value, and
13268 poke both back into the guest state. Also pass the helper
13269 the actual opcode so it knows which of the 2 instructions it
13270 is doing the computation for. */
13273 Ity_I32
, 0/*regparm*/, "x86g_calculate_aad_aam",
13274 &x86g_calculate_aad_aam
,
13275 mkIRExprVec_2( mkexpr(t1
), mkU32( opc
& 0xFF) )
13277 putIReg(2, R_EAX
, unop(Iop_32to16
, mkexpr(t2
) ));
13279 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
13280 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
13281 stmt( IRStmt_Put( OFFB_CC_DEP1
,
13283 binop(Iop_Shr32
, mkexpr(t2
), mkU8(16)),
13284 mkU32( X86G_CC_MASK_C
| X86G_CC_MASK_P
13285 | X86G_CC_MASK_A
| X86G_CC_MASK_Z
13286 | X86G_CC_MASK_S
| X86G_CC_MASK_O
)
13290 /* Set NDEP even though it isn't used. This makes
13291 redundant-PUT elimination of previous stores to this field
13293 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
13295 DIP(opc
== 0xD4 ? "aam\n" : "aad\n");
13298 /* ------------------------ CWD/CDQ -------------------- */
13300 case 0x98: /* CBW */
13302 putIReg(4, R_EAX
, unop(Iop_16Sto32
, getIReg(2, R_EAX
)));
13306 putIReg(2, R_EAX
, unop(Iop_8Sto16
, getIReg(1, R_EAX
)));
13311 case 0x99: /* CWD/CDQ */
13314 binop(mkSizedOp(ty
,Iop_Sar8
),
13315 getIReg(sz
, R_EAX
),
13316 mkU8(sz
== 2 ? 15 : 31)) );
13317 DIP(sz
== 2 ? "cwdq\n" : "cdqq\n");
13320 /* ------------------------ FPU ops -------------------- */
13322 case 0x9E: /* SAHF */
13327 case 0x9F: /* LAHF */
13332 case 0x9B: /* FWAIT */
13345 Int delta0
= delta
;
13346 Bool decode_OK
= False
;
13347 delta
= dis_FPU ( &decode_OK
, sorb
, delta
);
13350 goto decode_failure
;
13355 /* ------------------------ INC & DEC ------------------ */
13357 case 0x40: /* INC eAX */
13358 case 0x41: /* INC eCX */
13359 case 0x42: /* INC eDX */
13360 case 0x43: /* INC eBX */
13361 case 0x44: /* INC eSP */
13362 case 0x45: /* INC eBP */
13363 case 0x46: /* INC eSI */
13364 case 0x47: /* INC eDI */
13365 vassert(sz
== 2 || sz
== 4);
13368 assign( t1
, binop(mkSizedOp(ty
,Iop_Add8
),
13369 getIReg(sz
, (UInt
)(opc
- 0x40)),
13371 setFlags_INC_DEC( True
, t1
, ty
);
13372 putIReg(sz
, (UInt
)(opc
- 0x40), mkexpr(t1
));
13373 DIP("inc%c %s\n", nameISize(sz
), nameIReg(sz
,opc
-0x40));
13376 case 0x48: /* DEC eAX */
13377 case 0x49: /* DEC eCX */
13378 case 0x4A: /* DEC eDX */
13379 case 0x4B: /* DEC eBX */
13380 case 0x4C: /* DEC eSP */
13381 case 0x4D: /* DEC eBP */
13382 case 0x4E: /* DEC eSI */
13383 case 0x4F: /* DEC eDI */
13384 vassert(sz
== 2 || sz
== 4);
13387 assign( t1
, binop(mkSizedOp(ty
,Iop_Sub8
),
13388 getIReg(sz
, (UInt
)(opc
- 0x48)),
13390 setFlags_INC_DEC( False
, t1
, ty
);
13391 putIReg(sz
, (UInt
)(opc
- 0x48), mkexpr(t1
));
13392 DIP("dec%c %s\n", nameISize(sz
), nameIReg(sz
,opc
-0x48));
13395 /* ------------------------ INT ------------------------ */
13397 case 0xCC: /* INT 3 */
13398 jmp_lit(&dres
, Ijk_SigTRAP
, ((Addr32
)guest_EIP_bbstart
)+delta
);
13399 vassert(dres
.whatNext
== Dis_StopHere
);
13403 case 0xCD: /* INT imm8 */
13404 d32
= getIByte(delta
); delta
++;
13406 /* For any of the cases where we emit a jump (that is, for all
13407 currently handled cases), it's important that all ArchRegs
13408 carry their up-to-date value at this point. So we declare an
13409 end-of-block here, which forces any TempRegs caching ArchRegs
13412 /* Handle int $0x3F .. $0x4F by synthesising a segfault and a
13413 restart of this instruction (hence the "-2" two lines below,
13414 to get the restart EIP to be this instruction. This is
13415 probably Linux-specific and it would be more correct to only
13416 do this if the VexAbiInfo says that is what we should do.
13417 This used to handle just 0x40-0x43; Jikes RVM uses a larger
13418 range (0x3F-0x49), and this allows some slack as well. */
13419 if (d32
>= 0x3F && d32
<= 0x4F) {
13420 jmp_lit(&dres
, Ijk_SigSEGV
, ((Addr32
)guest_EIP_bbstart
)+delta
-2);
13421 vassert(dres
.whatNext
== Dis_StopHere
);
13422 DIP("int $0x%x\n", d32
);
13426 /* Handle int $0x80 (linux syscalls), int $0x81 and $0x82
13427 (darwin syscalls), int $0x91 (Solaris syscalls) and int $0xD2
13428 (Solaris fasttrap syscalls). As part of this, note where we are, so we
13429 can back up the guest to this point if the syscall needs to
13431 IRJumpKind jump_kind
;
13434 jump_kind
= Ijk_Sys_int128
;
13437 jump_kind
= Ijk_Sys_int129
;
13440 jump_kind
= Ijk_Sys_int130
;
13443 jump_kind
= Ijk_Sys_int145
;
13446 jump_kind
= Ijk_Sys_int210
;
13449 /* none of the above */
13450 goto decode_failure
;
13453 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL
,
13454 mkU32(guest_EIP_curr_instr
) ) );
13455 jmp_lit(&dres
, jump_kind
, ((Addr32
)guest_EIP_bbstart
)+delta
);
13456 vassert(dres
.whatNext
== Dis_StopHere
);
13457 DIP("int $0x%x\n", d32
);
13460 /* ------------------------ Jcond, byte offset --------- */
13462 case 0xEB: /* Jb (jump, byte offset) */
13463 d32
= (((Addr32
)guest_EIP_bbstart
)+delta
+1) + getSDisp8(delta
);
13465 if (resteerOkFn( callback_opaque
, (Addr32
)d32
) ) {
13466 dres
.whatNext
= Dis_ResteerU
;
13467 dres
.continueAt
= (Addr32
)d32
;
13469 jmp_lit(&dres
, Ijk_Boring
, d32
);
13470 vassert(dres
.whatNext
== Dis_StopHere
);
13472 DIP("jmp-8 0x%x\n", d32
);
13475 case 0xE9: /* Jv (jump, 16/32 offset) */
13476 vassert(sz
== 4); /* JRS added 2004 July 11 */
13477 d32
= (((Addr32
)guest_EIP_bbstart
)+delta
+sz
) + getSDisp(sz
,delta
);
13479 if (resteerOkFn( callback_opaque
, (Addr32
)d32
) ) {
13480 dres
.whatNext
= Dis_ResteerU
;
13481 dres
.continueAt
= (Addr32
)d32
;
13483 jmp_lit(&dres
, Ijk_Boring
, d32
);
13484 vassert(dres
.whatNext
== Dis_StopHere
);
13486 DIP("jmp 0x%x\n", d32
);
13491 case 0x72: /* JBb/JNAEb (jump below) */
13492 case 0x73: /* JNBb/JAEb (jump not below) */
13493 case 0x74: /* JZb/JEb (jump zero) */
13494 case 0x75: /* JNZb/JNEb (jump not zero) */
13495 case 0x76: /* JBEb/JNAb (jump below or equal) */
13496 case 0x77: /* JNBEb/JAb (jump not below or equal) */
13497 case 0x78: /* JSb (jump negative) */
13498 case 0x79: /* JSb (jump not negative) */
13499 case 0x7A: /* JP (jump parity even) */
13500 case 0x7B: /* JNP/JPO (jump parity odd) */
13501 case 0x7C: /* JLb/JNGEb (jump less) */
13502 case 0x7D: /* JGEb/JNLb (jump greater or equal) */
13503 case 0x7E: /* JLEb/JNGb (jump less or equal) */
13504 case 0x7F: /* JGb/JNLEb (jump greater) */
13506 const HChar
* comment
= "";
13507 jmpDelta
= (Int
)getSDisp8(delta
);
13508 vassert(-128 <= jmpDelta
&& jmpDelta
< 128);
13509 d32
= (((Addr32
)guest_EIP_bbstart
)+delta
+1) + jmpDelta
;
13512 && vex_control
.guest_chase_cond
13513 && (Addr32
)d32
!= (Addr32
)guest_EIP_bbstart
13515 && resteerOkFn( callback_opaque
, (Addr32
)d32
) ) {
13516 /* Speculation: assume this backward branch is taken. So we
13517 need to emit a side-exit to the insn following this one,
13518 on the negation of the condition, and continue at the
13519 branch target address (d32). If we wind up back at the
13520 first instruction of the trace, just stop; it's better to
13521 let the IR loop unroller handle that case. */
13523 mk_x86g_calculate_condition((X86Condcode
)(1 ^ (opc
- 0x70))),
13525 IRConst_U32(guest_EIP_bbstart
+delta
),
13527 dres
.whatNext
= Dis_ResteerC
;
13528 dres
.continueAt
= (Addr32
)d32
;
13529 comment
= "(assumed taken)";
13533 && vex_control
.guest_chase_cond
13534 && (Addr32
)d32
!= (Addr32
)guest_EIP_bbstart
13536 && resteerOkFn( callback_opaque
,
13537 (Addr32
)(guest_EIP_bbstart
+delta
)) ) {
13538 /* Speculation: assume this forward branch is not taken. So
13539 we need to emit a side-exit to d32 (the dest) and continue
13540 disassembling at the insn immediately following this
13543 mk_x86g_calculate_condition((X86Condcode
)(opc
- 0x70)),
13547 dres
.whatNext
= Dis_ResteerC
;
13548 dres
.continueAt
= guest_EIP_bbstart
+ delta
;
13549 comment
= "(assumed not taken)";
13552 /* Conservative default translation - end the block at this
13554 jcc_01( &dres
, (X86Condcode
)(opc
- 0x70),
13555 (Addr32
)(guest_EIP_bbstart
+delta
), d32
);
13556 vassert(dres
.whatNext
== Dis_StopHere
);
13558 DIP("j%s-8 0x%x %s\n", name_X86Condcode(opc
- 0x70), d32
, comment
);
13562 case 0xE3: /* JECXZ (for JCXZ see above) */
13563 if (sz
!= 4) goto decode_failure
;
13564 d32
= (((Addr32
)guest_EIP_bbstart
)+delta
+1) + getSDisp8(delta
);
13567 binop(Iop_CmpEQ32
, getIReg(4,R_ECX
), mkU32(0)),
13572 DIP("jecxz 0x%x\n", d32
);
13575 case 0xE0: /* LOOPNE disp8: decrement count, jump if count != 0 && ZF==0 */
13576 case 0xE1: /* LOOPE disp8: decrement count, jump if count != 0 && ZF==1 */
13577 case 0xE2: /* LOOP disp8: decrement count, jump if count != 0 */
13578 { /* Again, the docs say this uses ECX/CX as a count depending on
13579 the address size override, not the operand one. Since we
13580 don't handle address size overrides, I guess that means
13582 IRExpr
* zbit
= NULL
;
13583 IRExpr
* count
= NULL
;
13584 IRExpr
* cond
= NULL
;
13585 const HChar
* xtra
= NULL
;
13587 if (sz
!= 4) goto decode_failure
;
13588 d32
= (((Addr32
)guest_EIP_bbstart
)+delta
+1) + getSDisp8(delta
);
13590 putIReg(4, R_ECX
, binop(Iop_Sub32
, getIReg(4,R_ECX
), mkU32(1)));
13592 count
= getIReg(4,R_ECX
);
13593 cond
= binop(Iop_CmpNE32
, count
, mkU32(0));
13600 zbit
= mk_x86g_calculate_condition( X86CondZ
);
13601 cond
= mkAnd1(cond
, zbit
);
13605 zbit
= mk_x86g_calculate_condition( X86CondNZ
);
13606 cond
= mkAnd1(cond
, zbit
);
13611 stmt( IRStmt_Exit(cond
, Ijk_Boring
, IRConst_U32(d32
), OFFB_EIP
) );
13613 DIP("loop%s 0x%x\n", xtra
, d32
);
13617 /* ------------------------ IMUL ----------------------- */
13619 case 0x69: /* IMUL Iv, Ev, Gv */
13620 delta
= dis_imul_I_E_G ( sorb
, sz
, delta
, sz
);
13622 case 0x6B: /* IMUL Ib, Ev, Gv */
13623 delta
= dis_imul_I_E_G ( sorb
, sz
, delta
, 1 );
13626 /* ------------------------ MOV ------------------------ */
13628 case 0x88: /* MOV Gb,Eb */
13629 delta
= dis_mov_G_E(sorb
, 1, delta
);
13632 case 0x89: /* MOV Gv,Ev */
13633 delta
= dis_mov_G_E(sorb
, sz
, delta
);
13636 case 0x8A: /* MOV Eb,Gb */
13637 delta
= dis_mov_E_G(sorb
, 1, delta
);
13640 case 0x8B: /* MOV Ev,Gv */
13641 delta
= dis_mov_E_G(sorb
, sz
, delta
);
13644 case 0x8D: /* LEA M,Gv */
13646 goto decode_failure
;
13647 modrm
= getIByte(delta
);
13648 if (epartIsReg(modrm
))
13649 goto decode_failure
;
13650 /* NOTE! this is the one place where a segment override prefix
13651 has no effect on the address calculation. Therefore we pass
13652 zero instead of sorb here. */
13653 addr
= disAMode ( &alen
, /*sorb*/ 0, delta
, dis_buf
);
13655 putIReg(sz
, gregOfRM(modrm
), mkexpr(addr
));
13656 DIP("lea%c %s, %s\n", nameISize(sz
), dis_buf
,
13657 nameIReg(sz
,gregOfRM(modrm
)));
13660 case 0x8C: /* MOV Sw,Ew -- MOV from a SEGMENT REGISTER */
13661 delta
= dis_mov_Sw_Ew(sorb
, sz
, delta
);
13664 case 0x8E: /* MOV Ew,Sw -- MOV to a SEGMENT REGISTER */
13665 delta
= dis_mov_Ew_Sw(sorb
, delta
);
13668 case 0xA0: /* MOV Ob,AL */
13670 /* Fall through ... */
13671 case 0xA1: /* MOV Ov,eAX */
13672 d32
= getUDisp32(delta
); delta
+= 4;
13674 addr
= newTemp(Ity_I32
);
13675 assign( addr
, handleSegOverride(sorb
, mkU32(d32
)) );
13676 putIReg(sz
, R_EAX
, loadLE(ty
, mkexpr(addr
)));
13677 DIP("mov%c %s0x%x, %s\n", nameISize(sz
), sorbTxt(sorb
),
13678 d32
, nameIReg(sz
,R_EAX
));
13681 case 0xA2: /* MOV Ob,AL */
13683 /* Fall through ... */
13684 case 0xA3: /* MOV eAX,Ov */
13685 d32
= getUDisp32(delta
); delta
+= 4;
13687 addr
= newTemp(Ity_I32
);
13688 assign( addr
, handleSegOverride(sorb
, mkU32(d32
)) );
13689 storeLE( mkexpr(addr
), getIReg(sz
,R_EAX
) );
13690 DIP("mov%c %s, %s0x%x\n", nameISize(sz
), nameIReg(sz
,R_EAX
),
13691 sorbTxt(sorb
), d32
);
13694 case 0xB0: /* MOV imm,AL */
13695 case 0xB1: /* MOV imm,CL */
13696 case 0xB2: /* MOV imm,DL */
13697 case 0xB3: /* MOV imm,BL */
13698 case 0xB4: /* MOV imm,AH */
13699 case 0xB5: /* MOV imm,CH */
13700 case 0xB6: /* MOV imm,DH */
13701 case 0xB7: /* MOV imm,BH */
13702 d32
= getIByte(delta
); delta
+= 1;
13703 putIReg(1, opc
-0xB0, mkU8(d32
));
13704 DIP("movb $0x%x,%s\n", d32
, nameIReg(1,opc
-0xB0));
13707 case 0xB8: /* MOV imm,eAX */
13708 case 0xB9: /* MOV imm,eCX */
13709 case 0xBA: /* MOV imm,eDX */
13710 case 0xBB: /* MOV imm,eBX */
13711 case 0xBC: /* MOV imm,eSP */
13712 case 0xBD: /* MOV imm,eBP */
13713 case 0xBE: /* MOV imm,eSI */
13714 case 0xBF: /* MOV imm,eDI */
13715 d32
= getUDisp(sz
,delta
); delta
+= sz
;
13716 putIReg(sz
, opc
-0xB8, mkU(szToITy(sz
), d32
));
13717 DIP("mov%c $0x%x,%s\n", nameISize(sz
), d32
, nameIReg(sz
,opc
-0xB8));
13720 case 0xC6: /* C6 /0 = MOV Ib,Eb */
13722 goto maybe_do_Mov_I_E
;
13723 case 0xC7: /* C7 /0 = MOV Iv,Ev */
13724 goto maybe_do_Mov_I_E
;
13727 modrm
= getIByte(delta
);
13728 if (gregOfRM(modrm
) == 0) {
13729 if (epartIsReg(modrm
)) {
13730 delta
++; /* mod/rm byte */
13731 d32
= getUDisp(sz
,delta
); delta
+= sz
;
13732 putIReg(sz
, eregOfRM(modrm
), mkU(szToITy(sz
), d32
));
13733 DIP("mov%c $0x%x, %s\n", nameISize(sz
), d32
,
13734 nameIReg(sz
,eregOfRM(modrm
)));
13736 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
13738 d32
= getUDisp(sz
,delta
); delta
+= sz
;
13739 storeLE(mkexpr(addr
), mkU(szToITy(sz
), d32
));
13740 DIP("mov%c $0x%x, %s\n", nameISize(sz
), d32
, dis_buf
);
13744 goto decode_failure
;
13746 /* ------------------------ opl imm, A ----------------- */
13748 case 0x04: /* ADD Ib, AL */
13749 delta
= dis_op_imm_A( 1, False
, Iop_Add8
, True
, delta
, "add" );
13751 case 0x05: /* ADD Iv, eAX */
13752 delta
= dis_op_imm_A( sz
, False
, Iop_Add8
, True
, delta
, "add" );
13755 case 0x0C: /* OR Ib, AL */
13756 delta
= dis_op_imm_A( 1, False
, Iop_Or8
, True
, delta
, "or" );
13758 case 0x0D: /* OR Iv, eAX */
13759 delta
= dis_op_imm_A( sz
, False
, Iop_Or8
, True
, delta
, "or" );
13762 case 0x14: /* ADC Ib, AL */
13763 delta
= dis_op_imm_A( 1, True
, Iop_Add8
, True
, delta
, "adc" );
13765 case 0x15: /* ADC Iv, eAX */
13766 delta
= dis_op_imm_A( sz
, True
, Iop_Add8
, True
, delta
, "adc" );
13769 case 0x1C: /* SBB Ib, AL */
13770 delta
= dis_op_imm_A( 1, True
, Iop_Sub8
, True
, delta
, "sbb" );
13772 case 0x1D: /* SBB Iv, eAX */
13773 delta
= dis_op_imm_A( sz
, True
, Iop_Sub8
, True
, delta
, "sbb" );
13776 case 0x24: /* AND Ib, AL */
13777 delta
= dis_op_imm_A( 1, False
, Iop_And8
, True
, delta
, "and" );
13779 case 0x25: /* AND Iv, eAX */
13780 delta
= dis_op_imm_A( sz
, False
, Iop_And8
, True
, delta
, "and" );
13783 case 0x2C: /* SUB Ib, AL */
13784 delta
= dis_op_imm_A( 1, False
, Iop_Sub8
, True
, delta
, "sub" );
13786 case 0x2D: /* SUB Iv, eAX */
13787 delta
= dis_op_imm_A( sz
, False
, Iop_Sub8
, True
, delta
, "sub" );
13790 case 0x34: /* XOR Ib, AL */
13791 delta
= dis_op_imm_A( 1, False
, Iop_Xor8
, True
, delta
, "xor" );
13793 case 0x35: /* XOR Iv, eAX */
13794 delta
= dis_op_imm_A( sz
, False
, Iop_Xor8
, True
, delta
, "xor" );
13797 case 0x3C: /* CMP Ib, AL */
13798 delta
= dis_op_imm_A( 1, False
, Iop_Sub8
, False
, delta
, "cmp" );
13800 case 0x3D: /* CMP Iv, eAX */
13801 delta
= dis_op_imm_A( sz
, False
, Iop_Sub8
, False
, delta
, "cmp" );
13804 case 0xA8: /* TEST Ib, AL */
13805 delta
= dis_op_imm_A( 1, False
, Iop_And8
, False
, delta
, "test" );
13807 case 0xA9: /* TEST Iv, eAX */
13808 delta
= dis_op_imm_A( sz
, False
, Iop_And8
, False
, delta
, "test" );
13811 /* ------------------------ opl Ev, Gv ----------------- */
13813 case 0x02: /* ADD Eb,Gb */
13814 delta
= dis_op2_E_G ( sorb
, False
, Iop_Add8
, True
, 1, delta
, "add" );
13816 case 0x03: /* ADD Ev,Gv */
13817 delta
= dis_op2_E_G ( sorb
, False
, Iop_Add8
, True
, sz
, delta
, "add" );
13820 case 0x0A: /* OR Eb,Gb */
13821 delta
= dis_op2_E_G ( sorb
, False
, Iop_Or8
, True
, 1, delta
, "or" );
13823 case 0x0B: /* OR Ev,Gv */
13824 delta
= dis_op2_E_G ( sorb
, False
, Iop_Or8
, True
, sz
, delta
, "or" );
13827 case 0x12: /* ADC Eb,Gb */
13828 delta
= dis_op2_E_G ( sorb
, True
, Iop_Add8
, True
, 1, delta
, "adc" );
13830 case 0x13: /* ADC Ev,Gv */
13831 delta
= dis_op2_E_G ( sorb
, True
, Iop_Add8
, True
, sz
, delta
, "adc" );
13834 case 0x1A: /* SBB Eb,Gb */
13835 delta
= dis_op2_E_G ( sorb
, True
, Iop_Sub8
, True
, 1, delta
, "sbb" );
13837 case 0x1B: /* SBB Ev,Gv */
13838 delta
= dis_op2_E_G ( sorb
, True
, Iop_Sub8
, True
, sz
, delta
, "sbb" );
13841 case 0x22: /* AND Eb,Gb */
13842 delta
= dis_op2_E_G ( sorb
, False
, Iop_And8
, True
, 1, delta
, "and" );
13844 case 0x23: /* AND Ev,Gv */
13845 delta
= dis_op2_E_G ( sorb
, False
, Iop_And8
, True
, sz
, delta
, "and" );
13848 case 0x2A: /* SUB Eb,Gb */
13849 delta
= dis_op2_E_G ( sorb
, False
, Iop_Sub8
, True
, 1, delta
, "sub" );
13851 case 0x2B: /* SUB Ev,Gv */
13852 delta
= dis_op2_E_G ( sorb
, False
, Iop_Sub8
, True
, sz
, delta
, "sub" );
13855 case 0x32: /* XOR Eb,Gb */
13856 delta
= dis_op2_E_G ( sorb
, False
, Iop_Xor8
, True
, 1, delta
, "xor" );
13858 case 0x33: /* XOR Ev,Gv */
13859 delta
= dis_op2_E_G ( sorb
, False
, Iop_Xor8
, True
, sz
, delta
, "xor" );
13862 case 0x3A: /* CMP Eb,Gb */
13863 delta
= dis_op2_E_G ( sorb
, False
, Iop_Sub8
, False
, 1, delta
, "cmp" );
13865 case 0x3B: /* CMP Ev,Gv */
13866 delta
= dis_op2_E_G ( sorb
, False
, Iop_Sub8
, False
, sz
, delta
, "cmp" );
13869 case 0x84: /* TEST Eb,Gb */
13870 delta
= dis_op2_E_G ( sorb
, False
, Iop_And8
, False
, 1, delta
, "test" );
13872 case 0x85: /* TEST Ev,Gv */
13873 delta
= dis_op2_E_G ( sorb
, False
, Iop_And8
, False
, sz
, delta
, "test" );
13876 /* ------------------------ opl Gv, Ev ----------------- */
13878 case 0x00: /* ADD Gb,Eb */
13879 delta
= dis_op2_G_E ( sorb
, pfx_lock
, False
,
13880 Iop_Add8
, True
, 1, delta
, "add" );
13882 case 0x01: /* ADD Gv,Ev */
13883 delta
= dis_op2_G_E ( sorb
, pfx_lock
, False
,
13884 Iop_Add8
, True
, sz
, delta
, "add" );
13887 case 0x08: /* OR Gb,Eb */
13888 delta
= dis_op2_G_E ( sorb
, pfx_lock
, False
,
13889 Iop_Or8
, True
, 1, delta
, "or" );
13891 case 0x09: /* OR Gv,Ev */
13892 delta
= dis_op2_G_E ( sorb
, pfx_lock
, False
,
13893 Iop_Or8
, True
, sz
, delta
, "or" );
13896 case 0x10: /* ADC Gb,Eb */
13897 delta
= dis_op2_G_E ( sorb
, pfx_lock
, True
,
13898 Iop_Add8
, True
, 1, delta
, "adc" );
13900 case 0x11: /* ADC Gv,Ev */
13901 delta
= dis_op2_G_E ( sorb
, pfx_lock
, True
,
13902 Iop_Add8
, True
, sz
, delta
, "adc" );
13905 case 0x18: /* SBB Gb,Eb */
13906 delta
= dis_op2_G_E ( sorb
, pfx_lock
, True
,
13907 Iop_Sub8
, True
, 1, delta
, "sbb" );
13909 case 0x19: /* SBB Gv,Ev */
13910 delta
= dis_op2_G_E ( sorb
, pfx_lock
, True
,
13911 Iop_Sub8
, True
, sz
, delta
, "sbb" );
13914 case 0x20: /* AND Gb,Eb */
13915 delta
= dis_op2_G_E ( sorb
, pfx_lock
, False
,
13916 Iop_And8
, True
, 1, delta
, "and" );
13918 case 0x21: /* AND Gv,Ev */
13919 delta
= dis_op2_G_E ( sorb
, pfx_lock
, False
,
13920 Iop_And8
, True
, sz
, delta
, "and" );
13923 case 0x28: /* SUB Gb,Eb */
13924 delta
= dis_op2_G_E ( sorb
, pfx_lock
, False
,
13925 Iop_Sub8
, True
, 1, delta
, "sub" );
13927 case 0x29: /* SUB Gv,Ev */
13928 delta
= dis_op2_G_E ( sorb
, pfx_lock
, False
,
13929 Iop_Sub8
, True
, sz
, delta
, "sub" );
13932 case 0x30: /* XOR Gb,Eb */
13933 delta
= dis_op2_G_E ( sorb
, pfx_lock
, False
,
13934 Iop_Xor8
, True
, 1, delta
, "xor" );
13936 case 0x31: /* XOR Gv,Ev */
13937 delta
= dis_op2_G_E ( sorb
, pfx_lock
, False
,
13938 Iop_Xor8
, True
, sz
, delta
, "xor" );
13941 case 0x38: /* CMP Gb,Eb */
13942 delta
= dis_op2_G_E ( sorb
, pfx_lock
, False
,
13943 Iop_Sub8
, False
, 1, delta
, "cmp" );
13945 case 0x39: /* CMP Gv,Ev */
13946 delta
= dis_op2_G_E ( sorb
, pfx_lock
, False
,
13947 Iop_Sub8
, False
, sz
, delta
, "cmp" );
13950 /* ------------------------ POP ------------------------ */
13952 case 0x58: /* POP eAX */
13953 case 0x59: /* POP eCX */
13954 case 0x5A: /* POP eDX */
13955 case 0x5B: /* POP eBX */
13956 case 0x5D: /* POP eBP */
13957 case 0x5E: /* POP eSI */
13958 case 0x5F: /* POP eDI */
13959 case 0x5C: /* POP eSP */
13960 vassert(sz
== 2 || sz
== 4);
13961 t1
= newTemp(szToITy(sz
)); t2
= newTemp(Ity_I32
);
13962 assign(t2
, getIReg(4, R_ESP
));
13963 assign(t1
, loadLE(szToITy(sz
),mkexpr(t2
)));
13964 putIReg(4, R_ESP
, binop(Iop_Add32
, mkexpr(t2
), mkU32(sz
)));
13965 putIReg(sz
, opc
-0x58, mkexpr(t1
));
13966 DIP("pop%c %s\n", nameISize(sz
), nameIReg(sz
,opc
-0x58));
13969 case 0x9D: /* POPF */
13970 vassert(sz
== 2 || sz
== 4);
13971 t1
= newTemp(Ity_I32
); t2
= newTemp(Ity_I32
);
13972 assign(t2
, getIReg(4, R_ESP
));
13973 assign(t1
, widenUto32(loadLE(szToITy(sz
),mkexpr(t2
))));
13974 putIReg(4, R_ESP
, binop(Iop_Add32
, mkexpr(t2
), mkU32(sz
)));
13976 /* Generate IR to set %EFLAGS{O,S,Z,A,C,P,D,ID,AC} from the
13978 set_EFLAGS_from_value( t1
, True
/*emit_AC_emwarn*/,
13979 ((Addr32
)guest_EIP_bbstart
)+delta
);
13981 DIP("popf%c\n", nameISize(sz
));
13984 case 0x61: /* POPA */
13985 /* This is almost certainly wrong for sz==2. So ... */
13986 if (sz
!= 4) goto decode_failure
;
13988 /* t5 is the old %ESP value. */
13989 t5
= newTemp(Ity_I32
);
13990 assign( t5
, getIReg(4, R_ESP
) );
13992 /* Reload all the registers, except %esp. */
13993 putIReg(4,R_EAX
, loadLE(Ity_I32
, binop(Iop_Add32
,mkexpr(t5
),mkU32(28)) ));
13994 putIReg(4,R_ECX
, loadLE(Ity_I32
, binop(Iop_Add32
,mkexpr(t5
),mkU32(24)) ));
13995 putIReg(4,R_EDX
, loadLE(Ity_I32
, binop(Iop_Add32
,mkexpr(t5
),mkU32(20)) ));
13996 putIReg(4,R_EBX
, loadLE(Ity_I32
, binop(Iop_Add32
,mkexpr(t5
),mkU32(16)) ));
13997 /* ignore saved %ESP */
13998 putIReg(4,R_EBP
, loadLE(Ity_I32
, binop(Iop_Add32
,mkexpr(t5
),mkU32( 8)) ));
13999 putIReg(4,R_ESI
, loadLE(Ity_I32
, binop(Iop_Add32
,mkexpr(t5
),mkU32( 4)) ));
14000 putIReg(4,R_EDI
, loadLE(Ity_I32
, binop(Iop_Add32
,mkexpr(t5
),mkU32( 0)) ));
14002 /* and move %ESP back up */
14003 putIReg( 4, R_ESP
, binop(Iop_Add32
, mkexpr(t5
), mkU32(8*4)) );
14005 DIP("popa%c\n", nameISize(sz
));
14008 case 0x8F: /* POPL/POPW m32 */
14010 UChar rm
= getIByte(delta
);
14012 /* make sure this instruction is correct POP */
14013 if (epartIsReg(rm
) || gregOfRM(rm
) != 0)
14014 goto decode_failure
;
14015 /* and has correct size */
14016 if (sz
!= 4 && sz
!= 2)
14017 goto decode_failure
;
14020 t1
= newTemp(Ity_I32
); /* stack address */
14021 t3
= newTemp(ty
); /* data */
14022 /* set t1 to ESP: t1 = ESP */
14023 assign( t1
, getIReg(4, R_ESP
) );
14024 /* load M[ESP] to virtual register t3: t3 = M[t1] */
14025 assign( t3
, loadLE(ty
, mkexpr(t1
)) );
14027 /* increase ESP; must be done before the STORE. Intel manual says:
14028 If the ESP register is used as a base register for addressing
14029 a destination operand in memory, the POP instruction computes
14030 the effective address of the operand after it increments the
14033 putIReg(4, R_ESP
, binop(Iop_Add32
, mkexpr(t1
), mkU32(sz
)) );
14035 /* resolve MODR/M */
14036 addr
= disAMode ( &len
, sorb
, delta
, dis_buf
);
14037 storeLE( mkexpr(addr
), mkexpr(t3
) );
14039 DIP("pop%c %s\n", sz
==2 ? 'w' : 'l', dis_buf
);
14045 case 0x1F: /* POP %DS */
14046 dis_pop_segreg( R_DS
, sz
); break;
14047 case 0x07: /* POP %ES */
14048 dis_pop_segreg( R_ES
, sz
); break;
14049 case 0x17: /* POP %SS */
14050 dis_pop_segreg( R_SS
, sz
); break;
14052 /* ------------------------ PUSH ----------------------- */
14054 case 0x50: /* PUSH eAX */
14055 case 0x51: /* PUSH eCX */
14056 case 0x52: /* PUSH eDX */
14057 case 0x53: /* PUSH eBX */
14058 case 0x55: /* PUSH eBP */
14059 case 0x56: /* PUSH eSI */
14060 case 0x57: /* PUSH eDI */
14061 case 0x54: /* PUSH eSP */
14062 /* This is the Right Way, in that the value to be pushed is
14063 established before %esp is changed, so that pushl %esp
14064 correctly pushes the old value. */
14065 vassert(sz
== 2 || sz
== 4);
14066 ty
= sz
==2 ? Ity_I16
: Ity_I32
;
14067 t1
= newTemp(ty
); t2
= newTemp(Ity_I32
);
14068 assign(t1
, getIReg(sz
, opc
-0x50));
14069 assign(t2
, binop(Iop_Sub32
, getIReg(4, R_ESP
), mkU32(sz
)));
14070 putIReg(4, R_ESP
, mkexpr(t2
) );
14071 storeLE(mkexpr(t2
),mkexpr(t1
));
14072 DIP("push%c %s\n", nameISize(sz
), nameIReg(sz
,opc
-0x50));
14076 case 0x68: /* PUSH Iv */
14077 d32
= getUDisp(sz
,delta
); delta
+= sz
;
14079 case 0x6A: /* PUSH Ib, sign-extended to sz */
14080 d32
= getSDisp8(delta
); delta
+= 1;
14084 t1
= newTemp(Ity_I32
); t2
= newTemp(ty
);
14085 assign( t1
, binop(Iop_Sub32
,getIReg(4,R_ESP
),mkU32(sz
)) );
14086 putIReg(4, R_ESP
, mkexpr(t1
) );
14087 /* stop mkU16 asserting if d32 is a negative 16-bit number
14091 storeLE( mkexpr(t1
), mkU(ty
,d32
) );
14092 DIP("push%c $0x%x\n", nameISize(sz
), d32
);
14095 case 0x9C: /* PUSHF */ {
14096 vassert(sz
== 2 || sz
== 4);
14098 t1
= newTemp(Ity_I32
);
14099 assign( t1
, binop(Iop_Sub32
,getIReg(4,R_ESP
),mkU32(sz
)) );
14100 putIReg(4, R_ESP
, mkexpr(t1
) );
14102 /* Calculate OSZACP, and patch in fixed fields as per
14104 - bit 1 is always 1
14105 - bit 9 is Interrupt Enable (should always be 1 in user mode?)
14107 t2
= newTemp(Ity_I32
);
14108 assign( t2
, binop(Iop_Or32
,
14109 mk_x86g_calculate_eflags_all(),
14110 mkU32( (1<<1)|(1<<9) ) ));
14112 /* Patch in the D flag. This can simply be a copy of bit 10 of
14113 baseBlock[OFFB_DFLAG]. */
14114 t3
= newTemp(Ity_I32
);
14115 assign( t3
, binop(Iop_Or32
,
14118 IRExpr_Get(OFFB_DFLAG
,Ity_I32
),
14122 /* And patch in the ID flag. */
14123 t4
= newTemp(Ity_I32
);
14124 assign( t4
, binop(Iop_Or32
,
14127 binop(Iop_Shl32
, IRExpr_Get(OFFB_IDFLAG
,Ity_I32
),
14132 /* And patch in the AC flag. */
14133 t5
= newTemp(Ity_I32
);
14134 assign( t5
, binop(Iop_Or32
,
14137 binop(Iop_Shl32
, IRExpr_Get(OFFB_ACFLAG
,Ity_I32
),
14142 /* if sz==2, the stored value needs to be narrowed. */
14144 storeLE( mkexpr(t1
), unop(Iop_32to16
,mkexpr(t5
)) );
14146 storeLE( mkexpr(t1
), mkexpr(t5
) );
14148 DIP("pushf%c\n", nameISize(sz
));
14152 case 0x60: /* PUSHA */
14153 /* This is almost certainly wrong for sz==2. So ... */
14154 if (sz
!= 4) goto decode_failure
;
14156 /* This is the Right Way, in that the value to be pushed is
14157 established before %esp is changed, so that pusha
14158 correctly pushes the old %esp value. New value of %esp is
14159 pushed at start. */
14160 /* t0 is the %ESP value we're going to push. */
14161 t0
= newTemp(Ity_I32
);
14162 assign( t0
, getIReg(4, R_ESP
) );
14164 /* t5 will be the new %ESP value. */
14165 t5
= newTemp(Ity_I32
);
14166 assign( t5
, binop(Iop_Sub32
, mkexpr(t0
), mkU32(8*4)) );
14168 /* Update guest state before prodding memory. */
14169 putIReg(4, R_ESP
, mkexpr(t5
));
14171 /* Dump all the registers. */
14172 storeLE( binop(Iop_Add32
,mkexpr(t5
),mkU32(28)), getIReg(4,R_EAX
) );
14173 storeLE( binop(Iop_Add32
,mkexpr(t5
),mkU32(24)), getIReg(4,R_ECX
) );
14174 storeLE( binop(Iop_Add32
,mkexpr(t5
),mkU32(20)), getIReg(4,R_EDX
) );
14175 storeLE( binop(Iop_Add32
,mkexpr(t5
),mkU32(16)), getIReg(4,R_EBX
) );
14176 storeLE( binop(Iop_Add32
,mkexpr(t5
),mkU32(12)), mkexpr(t0
) /*esp*/);
14177 storeLE( binop(Iop_Add32
,mkexpr(t5
),mkU32( 8)), getIReg(4,R_EBP
) );
14178 storeLE( binop(Iop_Add32
,mkexpr(t5
),mkU32( 4)), getIReg(4,R_ESI
) );
14179 storeLE( binop(Iop_Add32
,mkexpr(t5
),mkU32( 0)), getIReg(4,R_EDI
) );
14181 DIP("pusha%c\n", nameISize(sz
));
14184 case 0x0E: /* PUSH %CS */
14185 dis_push_segreg( R_CS
, sz
); break;
14186 case 0x1E: /* PUSH %DS */
14187 dis_push_segreg( R_DS
, sz
); break;
14188 case 0x06: /* PUSH %ES */
14189 dis_push_segreg( R_ES
, sz
); break;
14190 case 0x16: /* PUSH %SS */
14191 dis_push_segreg( R_SS
, sz
); break;
14193 /* ------------------------ SCAS et al ----------------- */
14195 case 0xA4: /* MOVS, no REP prefix */
14198 goto decode_failure
; /* else dis_string_op asserts */
14199 dis_string_op( dis_MOVS
, ( opc
== 0xA4 ? 1 : sz
), "movs", sorb
);
14202 case 0xA6: /* CMPSb, no REP prefix */
14205 goto decode_failure
; /* else dis_string_op asserts */
14206 dis_string_op( dis_CMPS
, ( opc
== 0xA6 ? 1 : sz
), "cmps", sorb
);
14209 case 0xAA: /* STOS, no REP prefix */
14212 goto decode_failure
; /* else dis_string_op asserts */
14213 dis_string_op( dis_STOS
, ( opc
== 0xAA ? 1 : sz
), "stos", sorb
);
14216 case 0xAC: /* LODS, no REP prefix */
14219 goto decode_failure
; /* else dis_string_op asserts */
14220 dis_string_op( dis_LODS
, ( opc
== 0xAC ? 1 : sz
), "lods", sorb
);
14223 case 0xAE: /* SCAS, no REP prefix */
14226 goto decode_failure
; /* else dis_string_op asserts */
14227 dis_string_op( dis_SCAS
, ( opc
== 0xAE ? 1 : sz
), "scas", sorb
);
14231 case 0xFC: /* CLD */
14232 stmt( IRStmt_Put( OFFB_DFLAG
, mkU32(1)) );
14236 case 0xFD: /* STD */
14237 stmt( IRStmt_Put( OFFB_DFLAG
, mkU32(0xFFFFFFFF)) );
14241 case 0xF8: /* CLC */
14242 case 0xF9: /* STC */
14243 case 0xF5: /* CMC */
14244 t0
= newTemp(Ity_I32
);
14245 t1
= newTemp(Ity_I32
);
14246 assign( t0
, mk_x86g_calculate_eflags_all() );
14249 assign( t1
, binop(Iop_And32
, mkexpr(t0
),
14250 mkU32(~X86G_CC_MASK_C
)));
14254 assign( t1
, binop(Iop_Or32
, mkexpr(t0
),
14255 mkU32(X86G_CC_MASK_C
)));
14259 assign( t1
, binop(Iop_Xor32
, mkexpr(t0
),
14260 mkU32(X86G_CC_MASK_C
)));
14264 vpanic("disInstr(x86)(clc/stc/cmc)");
14266 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
14267 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
14268 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(t1
) ));
14269 /* Set NDEP even though it isn't used. This makes redundant-PUT
14270 elimination of previous stores to this field work better. */
14271 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
14274 case 0xD6: /* SALC */
14275 t0
= newTemp(Ity_I32
);
14276 t1
= newTemp(Ity_I32
);
14277 assign( t0
, binop(Iop_And32
,
14278 mk_x86g_calculate_eflags_c(),
14280 assign( t1
, binop(Iop_Sar32
,
14281 binop(Iop_Shl32
, mkexpr(t0
), mkU8(31)),
14283 putIReg(1, R_EAX
, unop(Iop_32to8
, mkexpr(t1
)) );
14287 /* REPNE prefix insn */
14289 Addr32 eip_orig
= guest_EIP_bbstart
+ delta_start
;
14290 if (sorb
!= 0) goto decode_failure
;
14291 abyte
= getIByte(delta
); delta
++;
14293 if (abyte
== 0x66) { sz
= 2; abyte
= getIByte(delta
); delta
++; }
14296 /* According to the Intel manual, "repne movs" should never occur, but
14297 * in practice it has happened, so allow for it here... */
14298 case 0xA4: sz
= 1; /* REPNE MOVS<sz> fallthrough */
14300 dis_REP_op ( &dres
, X86CondNZ
, dis_MOVS
, sz
, eip_orig
,
14301 guest_EIP_bbstart
+delta
, "repne movs" );
14304 case 0xA6: sz
= 1; /* REPNE CMP<sz> fallthrough */
14306 dis_REP_op ( &dres
, X86CondNZ
, dis_CMPS
, sz
, eip_orig
,
14307 guest_EIP_bbstart
+delta
, "repne cmps" );
14310 case 0xAA: sz
= 1; /* REPNE STOS<sz> fallthrough */
14312 dis_REP_op ( &dres
, X86CondNZ
, dis_STOS
, sz
, eip_orig
,
14313 guest_EIP_bbstart
+delta
, "repne stos" );
14316 case 0xAE: sz
= 1; /* REPNE SCAS<sz> fallthrough */
14318 dis_REP_op ( &dres
, X86CondNZ
, dis_SCAS
, sz
, eip_orig
,
14319 guest_EIP_bbstart
+delta
, "repne scas" );
14323 goto decode_failure
;
14328 /* REP/REPE prefix insn (for SCAS and CMPS, 0xF3 means REPE,
14329 for the rest, it means REP) */
14331 Addr32 eip_orig
= guest_EIP_bbstart
+ delta_start
;
14332 abyte
= getIByte(delta
); delta
++;
14334 if (abyte
== 0x66) { sz
= 2; abyte
= getIByte(delta
); delta
++; }
14336 if (sorb
!= 0 && abyte
!= 0x0F) goto decode_failure
;
14340 switch (getIByte(delta
)) {
14341 /* On older CPUs, TZCNT behaves the same as BSF. */
14342 case 0xBC: /* REP BSF Gv,Ev */
14343 delta
= dis_bs_E_G ( sorb
, sz
, delta
+ 1, True
);
14345 /* On older CPUs, LZCNT behaves the same as BSR. */
14346 case 0xBD: /* REP BSR Gv,Ev */
14347 delta
= dis_bs_E_G ( sorb
, sz
, delta
+ 1, False
);
14350 goto decode_failure
;
14354 case 0xA4: sz
= 1; /* REP MOVS<sz> fallthrough */
14356 dis_REP_op ( &dres
, X86CondAlways
, dis_MOVS
, sz
, eip_orig
,
14357 guest_EIP_bbstart
+delta
, "rep movs" );
14360 case 0xA6: sz
= 1; /* REPE CMP<sz> fallthrough */
14362 dis_REP_op ( &dres
, X86CondZ
, dis_CMPS
, sz
, eip_orig
,
14363 guest_EIP_bbstart
+delta
, "repe cmps" );
14366 case 0xAA: sz
= 1; /* REP STOS<sz> fallthrough */
14368 dis_REP_op ( &dres
, X86CondAlways
, dis_STOS
, sz
, eip_orig
,
14369 guest_EIP_bbstart
+delta
, "rep stos" );
14372 case 0xAC: sz
= 1; /* REP LODS<sz> fallthrough */
14374 dis_REP_op ( &dres
, X86CondAlways
, dis_LODS
, sz
, eip_orig
,
14375 guest_EIP_bbstart
+delta
, "rep lods" );
14378 case 0xAE: sz
= 1; /* REPE SCAS<sz> fallthrough */
14380 dis_REP_op ( &dres
, X86CondZ
, dis_SCAS
, sz
, eip_orig
,
14381 guest_EIP_bbstart
+delta
, "repe scas" );
14384 case 0x90: /* REP NOP (PAUSE) */
14385 /* a hint to the P4 re spin-wait loop */
14386 DIP("rep nop (P4 pause)\n");
14387 /* "observe" the hint. The Vex client needs to be careful not
14388 to cause very long delays as a result, though. */
14389 jmp_lit(&dres
, Ijk_Yield
, ((Addr32
)guest_EIP_bbstart
)+delta
);
14390 vassert(dres
.whatNext
== Dis_StopHere
);
14393 case 0xC3: /* REP RET -- same as normal ret? */
14399 goto decode_failure
;
14404 /* ------------------------ XCHG ----------------------- */
14406 /* XCHG reg,mem automatically asserts LOCK# even without a LOCK
14407 prefix; hence it must be translated with an IRCAS (at least, the
14408 memory variant). */
14409 case 0x86: /* XCHG Gb,Eb */
14411 /* Fall through ... */
14412 case 0x87: /* XCHG Gv,Ev */
14413 modrm
= getIByte(delta
);
14415 t1
= newTemp(ty
); t2
= newTemp(ty
);
14416 if (epartIsReg(modrm
)) {
14417 assign(t1
, getIReg(sz
, eregOfRM(modrm
)));
14418 assign(t2
, getIReg(sz
, gregOfRM(modrm
)));
14419 putIReg(sz
, gregOfRM(modrm
), mkexpr(t1
));
14420 putIReg(sz
, eregOfRM(modrm
), mkexpr(t2
));
14422 DIP("xchg%c %s, %s\n",
14423 nameISize(sz
), nameIReg(sz
,gregOfRM(modrm
)),
14424 nameIReg(sz
,eregOfRM(modrm
)));
14426 *expect_CAS
= True
;
14427 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
14428 assign( t1
, loadLE(ty
,mkexpr(addr
)) );
14429 assign( t2
, getIReg(sz
,gregOfRM(modrm
)) );
14430 casLE( mkexpr(addr
),
14431 mkexpr(t1
), mkexpr(t2
), guest_EIP_curr_instr
);
14432 putIReg( sz
, gregOfRM(modrm
), mkexpr(t1
) );
14434 DIP("xchg%c %s, %s\n", nameISize(sz
),
14435 nameIReg(sz
,gregOfRM(modrm
)), dis_buf
);
14439 case 0x90: /* XCHG eAX,eAX */
14442 case 0x91: /* XCHG eAX,eCX */
14443 case 0x92: /* XCHG eAX,eDX */
14444 case 0x93: /* XCHG eAX,eBX */
14445 case 0x94: /* XCHG eAX,eSP */
14446 case 0x95: /* XCHG eAX,eBP */
14447 case 0x96: /* XCHG eAX,eSI */
14448 case 0x97: /* XCHG eAX,eDI */
14449 codegen_xchg_eAX_Reg ( sz
, opc
- 0x90 );
14452 /* ------------------------ XLAT ----------------------- */
14454 case 0xD7: /* XLAT */
14455 if (sz
!= 4) goto decode_failure
; /* sz == 2 is also allowed (0x66) */
14464 unop(Iop_8Uto32
, getIReg(1, R_EAX
/*AL*/))))));
14466 DIP("xlat%c [ebx]\n", nameISize(sz
));
14469 /* ------------------------ IN / OUT ----------------------- */
14471 case 0xE4: /* IN imm8, AL */
14473 t1
= newTemp(Ity_I32
);
14474 abyte
= getIByte(delta
); delta
++;
14475 assign(t1
, mkU32( abyte
& 0xFF ));
14476 DIP("in%c $%d,%s\n", nameISize(sz
), abyte
, nameIReg(sz
,R_EAX
));
14478 case 0xE5: /* IN imm8, eAX */
14479 vassert(sz
== 2 || sz
== 4);
14480 t1
= newTemp(Ity_I32
);
14481 abyte
= getIByte(delta
); delta
++;
14482 assign(t1
, mkU32( abyte
& 0xFF ));
14483 DIP("in%c $%d,%s\n", nameISize(sz
), abyte
, nameIReg(sz
,R_EAX
));
14485 case 0xEC: /* IN %DX, AL */
14487 t1
= newTemp(Ity_I32
);
14488 assign(t1
, unop(Iop_16Uto32
, getIReg(2, R_EDX
)));
14489 DIP("in%c %s,%s\n", nameISize(sz
), nameIReg(2,R_EDX
),
14490 nameIReg(sz
,R_EAX
));
14492 case 0xED: /* IN %DX, eAX */
14493 vassert(sz
== 2 || sz
== 4);
14494 t1
= newTemp(Ity_I32
);
14495 assign(t1
, unop(Iop_16Uto32
, getIReg(2, R_EDX
)));
14496 DIP("in%c %s,%s\n", nameISize(sz
), nameIReg(2,R_EDX
),
14497 nameIReg(sz
,R_EAX
));
14500 /* At this point, sz indicates the width, and t1 is a 32-bit
14501 value giving port number. */
14503 vassert(sz
== 1 || sz
== 2 || sz
== 4);
14505 t2
= newTemp(Ity_I32
);
14506 d
= unsafeIRDirty_1_N(
14509 "x86g_dirtyhelper_IN",
14510 &x86g_dirtyhelper_IN
,
14511 mkIRExprVec_2( mkexpr(t1
), mkU32(sz
) )
14513 /* do the call, dumping the result in t2. */
14514 stmt( IRStmt_Dirty(d
) );
14515 putIReg(sz
, R_EAX
, narrowTo( ty
, mkexpr(t2
) ) );
14519 case 0xE6: /* OUT AL, imm8 */
14521 t1
= newTemp(Ity_I32
);
14522 abyte
= getIByte(delta
); delta
++;
14523 assign( t1
, mkU32( abyte
& 0xFF ) );
14524 DIP("out%c %s,$%d\n", nameISize(sz
), nameIReg(sz
,R_EAX
), abyte
);
14526 case 0xE7: /* OUT eAX, imm8 */
14527 vassert(sz
== 2 || sz
== 4);
14528 t1
= newTemp(Ity_I32
);
14529 abyte
= getIByte(delta
); delta
++;
14530 assign( t1
, mkU32( abyte
& 0xFF ) );
14531 DIP("out%c %s,$%d\n", nameISize(sz
), nameIReg(sz
,R_EAX
), abyte
);
14533 case 0xEE: /* OUT AL, %DX */
14535 t1
= newTemp(Ity_I32
);
14536 assign( t1
, unop(Iop_16Uto32
, getIReg(2, R_EDX
)) );
14537 DIP("out%c %s,%s\n", nameISize(sz
), nameIReg(sz
,R_EAX
),
14538 nameIReg(2,R_EDX
));
14540 case 0xEF: /* OUT eAX, %DX */
14541 vassert(sz
== 2 || sz
== 4);
14542 t1
= newTemp(Ity_I32
);
14543 assign( t1
, unop(Iop_16Uto32
, getIReg(2, R_EDX
)) );
14544 DIP("out%c %s,%s\n", nameISize(sz
), nameIReg(sz
,R_EAX
),
14545 nameIReg(2,R_EDX
));
14548 /* At this point, sz indicates the width, and t1 is a 32-bit
14549 value giving port number. */
14551 vassert(sz
== 1 || sz
== 2 || sz
== 4);
14553 d
= unsafeIRDirty_0_N(
14555 "x86g_dirtyhelper_OUT",
14556 &x86g_dirtyhelper_OUT
,
14557 mkIRExprVec_3( mkexpr(t1
),
14558 widenUto32( getIReg(sz
, R_EAX
) ),
14561 stmt( IRStmt_Dirty(d
) );
14565 /* ------------------------ (Grp1 extensions) ---------- */
14567 case 0x82: /* Grp1 Ib,Eb too. Apparently this is the same as
14568 case 0x80, but only in 32-bit mode. */
14570 case 0x80: /* Grp1 Ib,Eb */
14571 modrm
= getIByte(delta
);
14572 am_sz
= lengthAMode(delta
);
14575 d32
= getUChar(delta
+ am_sz
);
14576 delta
= dis_Grp1 ( sorb
, pfx_lock
, delta
, modrm
, am_sz
, d_sz
, sz
, d32
);
14579 case 0x81: /* Grp1 Iv,Ev */
14580 modrm
= getIByte(delta
);
14581 am_sz
= lengthAMode(delta
);
14583 d32
= getUDisp(d_sz
, delta
+ am_sz
);
14584 delta
= dis_Grp1 ( sorb
, pfx_lock
, delta
, modrm
, am_sz
, d_sz
, sz
, d32
);
14587 case 0x83: /* Grp1 Ib,Ev */
14588 modrm
= getIByte(delta
);
14589 am_sz
= lengthAMode(delta
);
14591 d32
= getSDisp8(delta
+ am_sz
);
14592 delta
= dis_Grp1 ( sorb
, pfx_lock
, delta
, modrm
, am_sz
, d_sz
, sz
, d32
);
14595 /* ------------------------ (Grp2 extensions) ---------- */
14597 case 0xC0: { /* Grp2 Ib,Eb */
14598 Bool decode_OK
= True
;
14599 modrm
= getIByte(delta
);
14600 am_sz
= lengthAMode(delta
);
14602 d32
= getUChar(delta
+ am_sz
);
14604 delta
= dis_Grp2 ( sorb
, delta
, modrm
, am_sz
, d_sz
, sz
,
14605 mkU8(d32
& 0xFF), NULL
, &decode_OK
);
14607 goto decode_failure
;
14610 case 0xC1: { /* Grp2 Ib,Ev */
14611 Bool decode_OK
= True
;
14612 modrm
= getIByte(delta
);
14613 am_sz
= lengthAMode(delta
);
14615 d32
= getUChar(delta
+ am_sz
);
14616 delta
= dis_Grp2 ( sorb
, delta
, modrm
, am_sz
, d_sz
, sz
,
14617 mkU8(d32
& 0xFF), NULL
, &decode_OK
);
14619 goto decode_failure
;
14622 case 0xD0: { /* Grp2 1,Eb */
14623 Bool decode_OK
= True
;
14624 modrm
= getIByte(delta
);
14625 am_sz
= lengthAMode(delta
);
14629 delta
= dis_Grp2 ( sorb
, delta
, modrm
, am_sz
, d_sz
, sz
,
14630 mkU8(d32
), NULL
, &decode_OK
);
14632 goto decode_failure
;
14635 case 0xD1: { /* Grp2 1,Ev */
14636 Bool decode_OK
= True
;
14637 modrm
= getUChar(delta
);
14638 am_sz
= lengthAMode(delta
);
14641 delta
= dis_Grp2 ( sorb
, delta
, modrm
, am_sz
, d_sz
, sz
,
14642 mkU8(d32
), NULL
, &decode_OK
);
14644 goto decode_failure
;
14647 case 0xD2: { /* Grp2 CL,Eb */
14648 Bool decode_OK
= True
;
14649 modrm
= getUChar(delta
);
14650 am_sz
= lengthAMode(delta
);
14653 delta
= dis_Grp2 ( sorb
, delta
, modrm
, am_sz
, d_sz
, sz
,
14654 getIReg(1,R_ECX
), "%cl", &decode_OK
);
14656 goto decode_failure
;
14659 case 0xD3: { /* Grp2 CL,Ev */
14660 Bool decode_OK
= True
;
14661 modrm
= getIByte(delta
);
14662 am_sz
= lengthAMode(delta
);
14664 delta
= dis_Grp2 ( sorb
, delta
, modrm
, am_sz
, d_sz
, sz
,
14665 getIReg(1,R_ECX
), "%cl", &decode_OK
);
14667 goto decode_failure
;
14671 /* ------------------------ (Grp3 extensions) ---------- */
14673 case 0xF6: { /* Grp3 Eb */
14674 Bool decode_OK
= True
;
14675 delta
= dis_Grp3 ( sorb
, pfx_lock
, 1, delta
, &decode_OK
);
14677 goto decode_failure
;
14680 case 0xF7: { /* Grp3 Ev */
14681 Bool decode_OK
= True
;
14682 delta
= dis_Grp3 ( sorb
, pfx_lock
, sz
, delta
, &decode_OK
);
14684 goto decode_failure
;
14688 /* ------------------------ (Grp4 extensions) ---------- */
14690 case 0xFE: { /* Grp4 Eb */
14691 Bool decode_OK
= True
;
14692 delta
= dis_Grp4 ( sorb
, pfx_lock
, delta
, &decode_OK
);
14694 goto decode_failure
;
14698 /* ------------------------ (Grp5 extensions) ---------- */
14700 case 0xFF: { /* Grp5 Ev */
14701 Bool decode_OK
= True
;
14702 delta
= dis_Grp5 ( sorb
, pfx_lock
, sz
, delta
, &dres
, &decode_OK
);
14704 goto decode_failure
;
14708 /* ------------------------ Escapes to 2-byte opcodes -- */
14711 opc
= getIByte(delta
); delta
++;
14714 /* =-=-=-=-=-=-=-=-=- Grp8 =-=-=-=-=-=-=-=-=-=-=-= */
14716 case 0xBA: { /* Grp8 Ib,Ev */
14717 Bool decode_OK
= False
;
14718 modrm
= getUChar(delta
);
14719 am_sz
= lengthAMode(delta
);
14720 d32
= getSDisp8(delta
+ am_sz
);
14721 delta
= dis_Grp8_Imm ( sorb
, pfx_lock
, delta
, modrm
,
14722 am_sz
, sz
, d32
, &decode_OK
);
14724 goto decode_failure
;
14728 /* =-=-=-=-=-=-=-=-=- BSF/BSR -=-=-=-=-=-=-=-=-=-= */
14730 case 0xBC: /* BSF Gv,Ev */
14731 delta
= dis_bs_E_G ( sorb
, sz
, delta
, True
);
14733 case 0xBD: /* BSR Gv,Ev */
14734 delta
= dis_bs_E_G ( sorb
, sz
, delta
, False
);
14737 /* =-=-=-=-=-=-=-=-=- BSWAP -=-=-=-=-=-=-=-=-=-=-= */
14739 case 0xC8: /* BSWAP %eax */
14746 case 0xCF: /* BSWAP %edi */
14747 /* AFAICS from the Intel docs, this only exists at size 4. */
14748 if (sz
!= 4) goto decode_failure
;
14750 t1
= newTemp(Ity_I32
);
14751 assign( t1
, getIReg(4, opc
-0xC8) );
14752 t2
= math_BSWAP(t1
, Ity_I32
);
14754 putIReg(4, opc
-0xC8, mkexpr(t2
));
14755 DIP("bswapl %s\n", nameIReg(4, opc
-0xC8));
14758 /* =-=-=-=-=-=-=-=-=- BT/BTS/BTR/BTC =-=-=-=-=-=-= */
14760 case 0xA3: /* BT Gv,Ev */
14761 delta
= dis_bt_G_E ( vbi
, sorb
, pfx_lock
, sz
, delta
, BtOpNone
);
14763 case 0xB3: /* BTR Gv,Ev */
14764 delta
= dis_bt_G_E ( vbi
, sorb
, pfx_lock
, sz
, delta
, BtOpReset
);
14766 case 0xAB: /* BTS Gv,Ev */
14767 delta
= dis_bt_G_E ( vbi
, sorb
, pfx_lock
, sz
, delta
, BtOpSet
);
14769 case 0xBB: /* BTC Gv,Ev */
14770 delta
= dis_bt_G_E ( vbi
, sorb
, pfx_lock
, sz
, delta
, BtOpComp
);
14773 /* =-=-=-=-=-=-=-=-=- CMOV =-=-=-=-=-=-=-=-=-=-=-= */
14777 case 0x42: /* CMOVBb/CMOVNAEb (cmov below) */
14778 case 0x43: /* CMOVNBb/CMOVAEb (cmov not below) */
14779 case 0x44: /* CMOVZb/CMOVEb (cmov zero) */
14780 case 0x45: /* CMOVNZb/CMOVNEb (cmov not zero) */
14781 case 0x46: /* CMOVBEb/CMOVNAb (cmov below or equal) */
14782 case 0x47: /* CMOVNBEb/CMOVAb (cmov not below or equal) */
14783 case 0x48: /* CMOVSb (cmov negative) */
14784 case 0x49: /* CMOVSb (cmov not negative) */
14785 case 0x4A: /* CMOVP (cmov parity even) */
14786 case 0x4B: /* CMOVNP (cmov parity odd) */
14787 case 0x4C: /* CMOVLb/CMOVNGEb (cmov less) */
14788 case 0x4D: /* CMOVGEb/CMOVNLb (cmov greater or equal) */
14789 case 0x4E: /* CMOVLEb/CMOVNGb (cmov less or equal) */
14790 case 0x4F: /* CMOVGb/CMOVNLEb (cmov greater) */
14791 delta
= dis_cmov_E_G(sorb
, sz
, (X86Condcode
)(opc
- 0x40), delta
);
14794 /* =-=-=-=-=-=-=-=-=- CMPXCHG -=-=-=-=-=-=-=-=-=-= */
14796 case 0xB0: /* CMPXCHG Gb,Eb */
14797 delta
= dis_cmpxchg_G_E ( sorb
, pfx_lock
, 1, delta
);
14799 case 0xB1: /* CMPXCHG Gv,Ev */
14800 delta
= dis_cmpxchg_G_E ( sorb
, pfx_lock
, sz
, delta
);
14803 case 0xC7: { /* CMPXCHG8B Gv (0F C7 /1) */
14804 IRTemp expdHi
= newTemp(Ity_I32
);
14805 IRTemp expdLo
= newTemp(Ity_I32
);
14806 IRTemp dataHi
= newTemp(Ity_I32
);
14807 IRTemp dataLo
= newTemp(Ity_I32
);
14808 IRTemp oldHi
= newTemp(Ity_I32
);
14809 IRTemp oldLo
= newTemp(Ity_I32
);
14810 IRTemp flags_old
= newTemp(Ity_I32
);
14811 IRTemp flags_new
= newTemp(Ity_I32
);
14812 IRTemp success
= newTemp(Ity_I1
);
14814 /* Translate this using a DCAS, even if there is no LOCK
14815 prefix. Life is too short to bother with generating two
14816 different translations for the with/without-LOCK-prefix
14818 *expect_CAS
= True
;
14820 /* Decode, and generate address. */
14821 if (sz
!= 4) goto decode_failure
;
14822 modrm
= getIByte(delta
);
14823 if (epartIsReg(modrm
)) goto decode_failure
;
14824 if (gregOfRM(modrm
) != 1) goto decode_failure
;
14825 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
14828 /* Get the expected and new values. */
14829 assign( expdHi
, getIReg(4,R_EDX
) );
14830 assign( expdLo
, getIReg(4,R_EAX
) );
14831 assign( dataHi
, getIReg(4,R_ECX
) );
14832 assign( dataLo
, getIReg(4,R_EBX
) );
14836 mkIRCAS( oldHi
, oldLo
,
14837 Iend_LE
, mkexpr(addr
),
14838 mkexpr(expdHi
), mkexpr(expdLo
),
14839 mkexpr(dataHi
), mkexpr(dataLo
)
14842 /* success when oldHi:oldLo == expdHi:expdLo */
14844 binop(Iop_CasCmpEQ32
,
14846 binop(Iop_Xor32
, mkexpr(oldHi
), mkexpr(expdHi
)),
14847 binop(Iop_Xor32
, mkexpr(oldLo
), mkexpr(expdLo
))
14852 /* If the DCAS is successful, that is to say oldHi:oldLo ==
14853 expdHi:expdLo, then put expdHi:expdLo back in EDX:EAX,
14854 which is where they came from originally. Both the actual
14855 contents of these two regs, and any shadow values, are
14856 unchanged. If the DCAS fails then we're putting into
14857 EDX:EAX the value seen in memory. */
14859 IRExpr_ITE( mkexpr(success
),
14860 mkexpr(expdHi
), mkexpr(oldHi
)
14863 IRExpr_ITE( mkexpr(success
),
14864 mkexpr(expdLo
), mkexpr(oldLo
)
14867 /* Copy the success bit into the Z flag and leave the others
14869 assign( flags_old
, widenUto32(mk_x86g_calculate_eflags_all()));
14873 binop(Iop_And32
, mkexpr(flags_old
),
14874 mkU32(~X86G_CC_MASK_Z
)),
14877 unop(Iop_1Uto32
, mkexpr(success
)), mkU32(1)),
14878 mkU8(X86G_CC_SHIFT_Z
)) ));
14880 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
14881 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(flags_new
) ));
14882 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
14883 /* Set NDEP even though it isn't used. This makes
14884 redundant-PUT elimination of previous stores to this field
14886 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
14888 /* Sheesh. Aren't you glad it was me and not you that had to
14889 write and validate all this grunge? */
14891 DIP("cmpxchg8b %s\n", dis_buf
);
14895 /* =-=-=-=-=-=-=-=-=- CPUID -=-=-=-=-=-=-=-=-=-=-= */
14897 case 0xA2: { /* CPUID */
14898 /* Uses dirty helper:
14899 void dirtyhelper_CPUID_sse[012] ( VexGuestX86State* )
14900 declared to mod eax, wr ebx, ecx, edx
14903 void* fAddr
= NULL
;
14904 const HChar
* fName
= NULL
;
14905 if (archinfo
->hwcaps
& VEX_HWCAPS_X86_SSE3
) {
14906 fName
= "x86g_dirtyhelper_CPUID_sse3";
14907 fAddr
= &x86g_dirtyhelper_CPUID_sse3
;
14910 if (archinfo
->hwcaps
& VEX_HWCAPS_X86_SSE2
) {
14911 fName
= "x86g_dirtyhelper_CPUID_sse2";
14912 fAddr
= &x86g_dirtyhelper_CPUID_sse2
;
14915 if (archinfo
->hwcaps
& VEX_HWCAPS_X86_SSE1
) {
14916 fName
= "x86g_dirtyhelper_CPUID_sse1";
14917 fAddr
= &x86g_dirtyhelper_CPUID_sse1
;
14920 if (archinfo
->hwcaps
& VEX_HWCAPS_X86_MMXEXT
) {
14921 fName
= "x86g_dirtyhelper_CPUID_mmxext";
14922 fAddr
= &x86g_dirtyhelper_CPUID_mmxext
;
14925 if (archinfo
->hwcaps
== 0/*no SSE*/) {
14926 fName
= "x86g_dirtyhelper_CPUID_sse0";
14927 fAddr
= &x86g_dirtyhelper_CPUID_sse0
;
14929 vpanic("disInstr(x86)(cpuid)");
14931 vassert(fName
); vassert(fAddr
);
14932 d
= unsafeIRDirty_0_N ( 0/*regparms*/,
14933 fName
, fAddr
, mkIRExprVec_1(IRExpr_GSPTR()) );
14934 /* declare guest state effects */
14936 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
14937 d
->fxState
[0].fx
= Ifx_Modify
;
14938 d
->fxState
[0].offset
= OFFB_EAX
;
14939 d
->fxState
[0].size
= 4;
14940 d
->fxState
[1].fx
= Ifx_Write
;
14941 d
->fxState
[1].offset
= OFFB_EBX
;
14942 d
->fxState
[1].size
= 4;
14943 d
->fxState
[2].fx
= Ifx_Modify
;
14944 d
->fxState
[2].offset
= OFFB_ECX
;
14945 d
->fxState
[2].size
= 4;
14946 d
->fxState
[3].fx
= Ifx_Write
;
14947 d
->fxState
[3].offset
= OFFB_EDX
;
14948 d
->fxState
[3].size
= 4;
14949 /* execute the dirty call, side-effecting guest state */
14950 stmt( IRStmt_Dirty(d
) );
14951 /* CPUID is a serialising insn. So, just in case someone is
14952 using it as a memory fence ... */
14953 stmt( IRStmt_MBE(Imbe_Fence
) );
14958 //-- if (!VG_(cpu_has_feature)(VG_X86_FEAT_CPUID))
14959 //-- goto decode_failure;
14961 //-- t1 = newTemp(cb);
14962 //-- t2 = newTemp(cb);
14963 //-- t3 = newTemp(cb);
14964 //-- t4 = newTemp(cb);
14965 //-- uInstr0(cb, CALLM_S, 0);
14967 //-- uInstr2(cb, GET, 4, ArchReg, R_EAX, TempReg, t1);
14968 //-- uInstr1(cb, PUSH, 4, TempReg, t1);
14970 //-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t2);
14971 //-- uLiteral(cb, 0);
14972 //-- uInstr1(cb, PUSH, 4, TempReg, t2);
14974 //-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t3);
14975 //-- uLiteral(cb, 0);
14976 //-- uInstr1(cb, PUSH, 4, TempReg, t3);
14978 //-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t4);
14979 //-- uLiteral(cb, 0);
14980 //-- uInstr1(cb, PUSH, 4, TempReg, t4);
14982 //-- uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_CPUID));
14983 //-- uFlagsRWU(cb, FlagsEmpty, FlagsEmpty, FlagsEmpty);
14985 //-- uInstr1(cb, POP, 4, TempReg, t4);
14986 //-- uInstr2(cb, PUT, 4, TempReg, t4, ArchReg, R_EDX);
14988 //-- uInstr1(cb, POP, 4, TempReg, t3);
14989 //-- uInstr2(cb, PUT, 4, TempReg, t3, ArchReg, R_ECX);
14991 //-- uInstr1(cb, POP, 4, TempReg, t2);
14992 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_EBX);
14994 //-- uInstr1(cb, POP, 4, TempReg, t1);
14995 //-- uInstr2(cb, PUT, 4, TempReg, t1, ArchReg, R_EAX);
14997 //-- uInstr0(cb, CALLM_E, 0);
14998 //-- DIP("cpuid\n");
15001 /* =-=-=-=-=-=-=-=-=- MOVZX, MOVSX =-=-=-=-=-=-=-= */
15003 case 0xB6: /* MOVZXb Eb,Gv */
15004 if (sz
!= 2 && sz
!= 4)
15005 goto decode_failure
;
15006 delta
= dis_movx_E_G ( sorb
, delta
, 1, sz
, False
);
15009 case 0xB7: /* MOVZXw Ew,Gv */
15011 goto decode_failure
;
15012 delta
= dis_movx_E_G ( sorb
, delta
, 2, 4, False
);
15015 case 0xBE: /* MOVSXb Eb,Gv */
15016 if (sz
!= 2 && sz
!= 4)
15017 goto decode_failure
;
15018 delta
= dis_movx_E_G ( sorb
, delta
, 1, sz
, True
);
15021 case 0xBF: /* MOVSXw Ew,Gv */
15022 if (sz
!= 4 && /* accept movsww, sigh, see #250799 */sz
!= 2)
15023 goto decode_failure
;
15024 delta
= dis_movx_E_G ( sorb
, delta
, 2, sz
, True
);
15027 //-- /* =-=-=-=-=-=-=-=-=-=-= MOVNTI -=-=-=-=-=-=-=-=-= */
15029 //-- case 0xC3: /* MOVNTI Gv,Ev */
15030 //-- vg_assert(sz == 4);
15031 //-- modrm = getUChar(eip);
15032 //-- vg_assert(!epartIsReg(modrm));
15033 //-- t1 = newTemp(cb);
15034 //-- uInstr2(cb, GET, 4, ArchReg, gregOfRM(modrm), TempReg, t1);
15035 //-- pair = disAMode ( cb, sorb, eip, dis_buf );
15036 //-- t2 = LOW24(pair);
15037 //-- eip += HI8(pair);
15038 //-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2);
15039 //-- DIP("movnti %s,%s\n", nameIReg(4,gregOfRM(modrm)), dis_buf);
15042 /* =-=-=-=-=-=-=-=-=- MUL/IMUL =-=-=-=-=-=-=-=-=-= */
15044 case 0xAF: /* IMUL Ev, Gv */
15045 delta
= dis_mul_E_G ( sorb
, sz
, delta
);
15048 /* =-=-=-=-=-=-=-=-=- NOPs =-=-=-=-=-=-=-=-=-=-=-= */
15051 modrm
= getUChar(delta
);
15052 if (epartIsReg(modrm
)) goto decode_failure
;
15053 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
15055 DIP("nop%c %s\n", nameISize(sz
), dis_buf
);
15058 /* =-=-=-=-=-=-=-=-=- Jcond d32 -=-=-=-=-=-=-=-=-= */
15061 case 0x82: /* JBb/JNAEb (jump below) */
15062 case 0x83: /* JNBb/JAEb (jump not below) */
15063 case 0x84: /* JZb/JEb (jump zero) */
15064 case 0x85: /* JNZb/JNEb (jump not zero) */
15065 case 0x86: /* JBEb/JNAb (jump below or equal) */
15066 case 0x87: /* JNBEb/JAb (jump not below or equal) */
15067 case 0x88: /* JSb (jump negative) */
15068 case 0x89: /* JSb (jump not negative) */
15069 case 0x8A: /* JP (jump parity even) */
15070 case 0x8B: /* JNP/JPO (jump parity odd) */
15071 case 0x8C: /* JLb/JNGEb (jump less) */
15072 case 0x8D: /* JGEb/JNLb (jump greater or equal) */
15073 case 0x8E: /* JLEb/JNGb (jump less or equal) */
15074 case 0x8F: /* JGb/JNLEb (jump greater) */
15076 const HChar
* comment
= "";
15077 jmpDelta
= (Int
)getUDisp32(delta
);
15078 d32
= (((Addr32
)guest_EIP_bbstart
)+delta
+4) + jmpDelta
;
15081 && vex_control
.guest_chase_cond
15082 && (Addr32
)d32
!= (Addr32
)guest_EIP_bbstart
15084 && resteerOkFn( callback_opaque
, (Addr32
)d32
) ) {
15085 /* Speculation: assume this backward branch is taken. So
15086 we need to emit a side-exit to the insn following this
15087 one, on the negation of the condition, and continue at
15088 the branch target address (d32). If we wind up back at
15089 the first instruction of the trace, just stop; it's
15090 better to let the IR loop unroller handle that case.*/
15092 mk_x86g_calculate_condition((X86Condcode
)
15093 (1 ^ (opc
- 0x80))),
15095 IRConst_U32(guest_EIP_bbstart
+delta
),
15097 dres
.whatNext
= Dis_ResteerC
;
15098 dres
.continueAt
= (Addr32
)d32
;
15099 comment
= "(assumed taken)";
15103 && vex_control
.guest_chase_cond
15104 && (Addr32
)d32
!= (Addr32
)guest_EIP_bbstart
15106 && resteerOkFn( callback_opaque
,
15107 (Addr32
)(guest_EIP_bbstart
+delta
)) ) {
15108 /* Speculation: assume this forward branch is not taken.
15109 So we need to emit a side-exit to d32 (the dest) and
15110 continue disassembling at the insn immediately
15111 following this one. */
15113 mk_x86g_calculate_condition((X86Condcode
)(opc
- 0x80)),
15117 dres
.whatNext
= Dis_ResteerC
;
15118 dres
.continueAt
= guest_EIP_bbstart
+ delta
;
15119 comment
= "(assumed not taken)";
15122 /* Conservative default translation - end the block at
15124 jcc_01( &dres
, (X86Condcode
)(opc
- 0x80),
15125 (Addr32
)(guest_EIP_bbstart
+delta
), d32
);
15126 vassert(dres
.whatNext
== Dis_StopHere
);
15128 DIP("j%s-32 0x%x %s\n", name_X86Condcode(opc
- 0x80), d32
, comment
);
15132 /* =-=-=-=-=-=-=-=-=- RDTSC -=-=-=-=-=-=-=-=-=-=-= */
15133 case 0x31: { /* RDTSC */
15134 IRTemp val
= newTemp(Ity_I64
);
15135 IRExpr
** args
= mkIRExprVec_0();
15136 IRDirty
* d
= unsafeIRDirty_1_N (
15139 "x86g_dirtyhelper_RDTSC",
15140 &x86g_dirtyhelper_RDTSC
,
15143 /* execute the dirty call, dumping the result in val. */
15144 stmt( IRStmt_Dirty(d
) );
15145 putIReg(4, R_EDX
, unop(Iop_64HIto32
, mkexpr(val
)));
15146 putIReg(4, R_EAX
, unop(Iop_64to32
, mkexpr(val
)));
15151 /* =-=-=-=-=-=-=-=-=- PUSH/POP Sreg =-=-=-=-=-=-=-=-=-= */
15153 case 0xA1: /* POP %FS */
15154 dis_pop_segreg( R_FS
, sz
); break;
15155 case 0xA9: /* POP %GS */
15156 dis_pop_segreg( R_GS
, sz
); break;
15158 case 0xA0: /* PUSH %FS */
15159 dis_push_segreg( R_FS
, sz
); break;
15160 case 0xA8: /* PUSH %GS */
15161 dis_push_segreg( R_GS
, sz
); break;
15163 /* =-=-=-=-=-=-=-=-=- SETcc Eb =-=-=-=-=-=-=-=-=-= */
15166 case 0x92: /* set-Bb/set-NAEb (jump below) */
15167 case 0x93: /* set-NBb/set-AEb (jump not below) */
15168 case 0x94: /* set-Zb/set-Eb (jump zero) */
15169 case 0x95: /* set-NZb/set-NEb (jump not zero) */
15170 case 0x96: /* set-BEb/set-NAb (jump below or equal) */
15171 case 0x97: /* set-NBEb/set-Ab (jump not below or equal) */
15172 case 0x98: /* set-Sb (jump negative) */
15173 case 0x99: /* set-Sb (jump not negative) */
15174 case 0x9A: /* set-P (jump parity even) */
15175 case 0x9B: /* set-NP (jump parity odd) */
15176 case 0x9C: /* set-Lb/set-NGEb (jump less) */
15177 case 0x9D: /* set-GEb/set-NLb (jump greater or equal) */
15178 case 0x9E: /* set-LEb/set-NGb (jump less or equal) */
15179 case 0x9F: /* set-Gb/set-NLEb (jump greater) */
15180 t1
= newTemp(Ity_I8
);
15181 assign( t1
, unop(Iop_1Uto8
,mk_x86g_calculate_condition(opc
-0x90)) );
15182 modrm
= getIByte(delta
);
15183 if (epartIsReg(modrm
)) {
15185 putIReg(1, eregOfRM(modrm
), mkexpr(t1
));
15186 DIP("set%s %s\n", name_X86Condcode(opc
-0x90),
15187 nameIReg(1,eregOfRM(modrm
)));
15189 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
15191 storeLE( mkexpr(addr
), mkexpr(t1
) );
15192 DIP("set%s %s\n", name_X86Condcode(opc
-0x90), dis_buf
);
15196 /* =-=-=-=-=-=-=-=-=- SHLD/SHRD -=-=-=-=-=-=-=-=-= */
15198 case 0xA4: /* SHLDv imm8,Gv,Ev */
15199 modrm
= getIByte(delta
);
15200 d32
= delta
+ lengthAMode(delta
);
15201 vex_sprintf(dis_buf
, "$%d", getIByte(d32
));
15202 delta
= dis_SHLRD_Gv_Ev (
15203 sorb
, delta
, modrm
, sz
,
15204 mkU8(getIByte(d32
)), True
, /* literal */
15207 case 0xA5: /* SHLDv %cl,Gv,Ev */
15208 modrm
= getIByte(delta
);
15209 delta
= dis_SHLRD_Gv_Ev (
15210 sorb
, delta
, modrm
, sz
,
15211 getIReg(1,R_ECX
), False
, /* not literal */
15215 case 0xAC: /* SHRDv imm8,Gv,Ev */
15216 modrm
= getIByte(delta
);
15217 d32
= delta
+ lengthAMode(delta
);
15218 vex_sprintf(dis_buf
, "$%d", getIByte(d32
));
15219 delta
= dis_SHLRD_Gv_Ev (
15220 sorb
, delta
, modrm
, sz
,
15221 mkU8(getIByte(d32
)), True
, /* literal */
15224 case 0xAD: /* SHRDv %cl,Gv,Ev */
15225 modrm
= getIByte(delta
);
15226 delta
= dis_SHLRD_Gv_Ev (
15227 sorb
, delta
, modrm
, sz
,
15228 getIReg(1,R_ECX
), False
, /* not literal */
15232 /* =-=-=-=-=-=-=-=-=- SYSENTER -=-=-=-=-=-=-=-=-=-= */
15235 /* Simple implementation needing a long explaination.
15237 sysenter is a kind of syscall entry. The key thing here
15238 is that the return address is not known -- that is
15239 something that is beyond Vex's knowledge. So this IR
15240 forces a return to the scheduler, which can do what it
15241 likes to simulate the systenter, but it MUST set this
15242 thread's guest_EIP field with the continuation address
15243 before resuming execution. If that doesn't happen, the
15244 thread will jump to address zero, which is probably
15248 /* Note where we are, so we can back up the guest to this
15249 point if the syscall needs to be restarted. */
15250 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL
,
15251 mkU32(guest_EIP_curr_instr
) ) );
15252 jmp_lit(&dres
, Ijk_Sys_sysenter
, 0/*bogus next EIP value*/);
15253 vassert(dres
.whatNext
== Dis_StopHere
);
15257 /* =-=-=-=-=-=-=-=-=- XADD -=-=-=-=-=-=-=-=-=-= */
15259 case 0xC0: { /* XADD Gb,Eb */
15261 delta
= dis_xadd_G_E ( sorb
, pfx_lock
, 1, delta
, &decodeOK
);
15262 if (!decodeOK
) goto decode_failure
;
15265 case 0xC1: { /* XADD Gv,Ev */
15267 delta
= dis_xadd_G_E ( sorb
, pfx_lock
, sz
, delta
, &decodeOK
);
15268 if (!decodeOK
) goto decode_failure
;
15272 /* =-=-=-=-=-=-=-=-=- MMXery =-=-=-=-=-=-=-=-=-=-= */
15276 case 0x73: /* PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
15278 case 0x6E: /* MOVD (src)ireg-or-mem, (dst)mmxreg */
15279 case 0x7E: /* MOVD (src)mmxreg, (dst)ireg-or-mem */
15280 case 0x7F: /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
15281 case 0x6F: /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
15285 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
15288 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
15291 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
15295 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
15298 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
15301 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
15303 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
15304 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
15306 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
15310 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
15314 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
15316 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
15317 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
15318 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
15322 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
15326 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
15328 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
15329 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
15330 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
15331 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
15333 case 0xF1: /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
15337 case 0xD1: /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
15341 case 0xE1: /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
15344 Int delta0
= delta
-1;
15345 Bool decode_OK
= False
;
15347 /* If sz==2 this is SSE, and we assume sse idec has
15348 already spotted those cases by now. */
15350 goto decode_failure
;
15352 delta
= dis_MMX ( &decode_OK
, sorb
, sz
, delta
-1 );
15355 goto decode_failure
;
15360 case 0x0E: /* FEMMS */
15361 case 0x77: /* EMMS */
15363 goto decode_failure
;
15364 do_EMMS_preamble();
15368 /* =-=-=-=-=-=-=-=-=- SGDT and SIDT =-=-=-=-=-=-=-=-=-=-= */
15369 case 0x01: /* 0F 01 /0 -- SGDT */
15370 /* 0F 01 /1 -- SIDT */
15372 /* This is really revolting, but ... since each processor
15373 (core) only has one IDT and one GDT, just let the guest
15374 see it (pass-through semantics). I can't see any way to
15375 construct a faked-up value, so don't bother to try. */
15376 modrm
= getUChar(delta
);
15377 if (epartIsReg(modrm
)) goto decode_failure
;
15378 if (gregOfRM(modrm
) != 0 && gregOfRM(modrm
) != 1)
15379 goto decode_failure
;
15380 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
15382 switch (gregOfRM(modrm
)) {
15383 case 0: DIP("sgdt %s\n", dis_buf
); break;
15384 case 1: DIP("sidt %s\n", dis_buf
); break;
15385 default: vassert(0); /*NOTREACHED*/
15388 IRDirty
* d
= unsafeIRDirty_0_N (
15390 "x86g_dirtyhelper_SxDT",
15391 &x86g_dirtyhelper_SxDT
,
15392 mkIRExprVec_2( mkexpr(addr
),
15393 mkU32(gregOfRM(modrm
)) )
15395 /* declare we're writing memory */
15396 d
->mFx
= Ifx_Write
;
15397 d
->mAddr
= mkexpr(addr
);
15399 stmt( IRStmt_Dirty(d
) );
15403 case 0x05: /* AMD's syscall */
15404 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL
,
15405 mkU32(guest_EIP_curr_instr
) ) );
15406 jmp_lit(&dres
, Ijk_Sys_syscall
, ((Addr32
)guest_EIP_bbstart
)+delta
);
15407 vassert(dres
.whatNext
== Dis_StopHere
);
15411 /* =-=-=-=-=-=-=-=-=-=- UD2 =-=-=-=-=-=-=-=-=-=-=-= */
15413 case 0x0B: /* UD2 */
15414 stmt( IRStmt_Put( OFFB_EIP
, mkU32(guest_EIP_curr_instr
) ) );
15415 jmp_lit(&dres
, Ijk_NoDecode
, guest_EIP_curr_instr
);
15416 vassert(dres
.whatNext
== Dis_StopHere
);
15420 /* =-=-=-=-=-=-=-=-=- unimp2 =-=-=-=-=-=-=-=-=-=-= */
15423 goto decode_failure
;
15424 } /* switch (opc) for the 2-byte opcodes */
15425 goto decode_success
;
15426 } /* case 0x0F: of primary opcode */
15428 /* ------------------------ ??? ------------------------ */
15432 /* All decode failures end up here. */
15434 vex_printf("vex x86->IR: unhandled instruction bytes: "
15435 "0x%x 0x%x 0x%x 0x%x\n",
15436 getIByte(delta_start
+0),
15437 getIByte(delta_start
+1),
15438 getIByte(delta_start
+2),
15439 getIByte(delta_start
+3));
15442 /* Tell the dispatcher that this insn cannot be decoded, and so has
15443 not been executed, and (is currently) the next to be executed.
15444 EIP should be up-to-date since it made so at the start of each
15445 insn, but nevertheless be paranoid and update it again right
15447 stmt( IRStmt_Put( OFFB_EIP
, mkU32(guest_EIP_curr_instr
) ) );
15448 jmp_lit(&dres
, Ijk_NoDecode
, guest_EIP_curr_instr
);
15449 vassert(dres
.whatNext
== Dis_StopHere
);
15451 /* We also need to say that a CAS is not expected now, regardless
15452 of what it might have been set to at the start of the function,
15453 since the IR that we've emitted just above (to synthesis a
15454 SIGILL) does not involve any CAS, and presumably no other IR has
15455 been emitted for this (non-decoded) insn. */
15456 *expect_CAS
= False
;
15459 } /* switch (opc) for the main (primary) opcode switch. */
15462 /* All decode successes end up here. */
15463 switch (dres
.whatNext
) {
15465 stmt( IRStmt_Put( OFFB_EIP
, mkU32(guest_EIP_bbstart
+ delta
) ) );
15469 stmt( IRStmt_Put( OFFB_EIP
, mkU32(dres
.continueAt
) ) );
15478 dres
.len
= delta
- delta_start
;
15486 /*------------------------------------------------------------*/
15487 /*--- Top-level fn ---*/
15488 /*------------------------------------------------------------*/
15490 /* Disassemble a single instruction into IR. The instruction
15491 is located in host memory at &guest_code[delta]. */
15493 DisResult
disInstr_X86 ( IRSB
* irsb_IN
,
15494 Bool (*resteerOkFn
) ( void*, Addr
),
15496 void* callback_opaque
,
15497 const UChar
* guest_code_IN
,
15500 VexArch guest_arch
,
15501 const VexArchInfo
* archinfo
,
15502 const VexAbiInfo
* abiinfo
,
15503 VexEndness host_endness_IN
,
15504 Bool sigill_diag_IN
)
15507 Bool expect_CAS
, has_CAS
;
15510 /* Set globals (see top of this file) */
15511 vassert(guest_arch
== VexArchX86
);
15512 guest_code
= guest_code_IN
;
15514 host_endness
= host_endness_IN
;
15515 guest_EIP_curr_instr
= (Addr32
)guest_IP
;
15516 guest_EIP_bbstart
= (Addr32
)toUInt(guest_IP
- delta
);
15518 x1
= irsb_IN
->stmts_used
;
15519 expect_CAS
= False
;
15520 dres
= disInstr_X86_WRK ( &expect_CAS
, resteerOkFn
,
15523 delta
, archinfo
, abiinfo
, sigill_diag_IN
);
15524 x2
= irsb_IN
->stmts_used
;
15527 /* See comment at the top of disInstr_X86_WRK for meaning of
15528 expect_CAS. Here, we (sanity-)check for the presence/absence of
15529 IRCAS as directed by the returned expect_CAS value. */
15531 for (i
= x1
; i
< x2
; i
++) {
15532 if (irsb_IN
->stmts
[i
]->tag
== Ist_CAS
)
15536 if (expect_CAS
!= has_CAS
) {
15537 /* inconsistency detected. re-disassemble the instruction so as
15538 to generate a useful error message; then assert. */
15539 vex_traceflags
|= VEX_TRACE_FE
;
15540 dres
= disInstr_X86_WRK ( &expect_CAS
, resteerOkFn
,
15543 delta
, archinfo
, abiinfo
, sigill_diag_IN
);
15544 for (i
= x1
; i
< x2
; i
++) {
15545 vex_printf("\t\t");
15546 ppIRStmt(irsb_IN
->stmts
[i
]);
15549 /* Failure of this assertion is serious and denotes a bug in
15551 vpanic("disInstr_X86: inconsistency in LOCK prefix handling");
15558 /*--------------------------------------------------------------------*/
15559 /*--- end guest_x86_toIR.c ---*/
15560 /*--------------------------------------------------------------------*/