2 /*--------------------------------------------------------------------*/
3 /*--- begin guest_x86_toIR.c ---*/
4 /*--------------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2004-2017 OpenWorks LLP
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
28 Neither the names of the U.S. Department of Energy nor the
29 University of California nor the names of its contributors may be
30 used to endorse or promote products derived from this software
31 without prior written permission.
34 /* Translates x86 code to IR. */
38 All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked
39 to ensure a 32-bit value is being written.
41 FUCOMI(P): what happens to A and S flags? Currently are forced
46 * all arithmetic done at 64 bits
48 * no FP exceptions, except for handling stack over/underflow
50 * FP rounding mode observed only for float->int conversions
51 and int->float conversions which could lose accuracy, and
52 for float-to-float rounding. For all other operations,
53 round-to-nearest is used, regardless.
55 * some of the FCOM cases could do with testing -- not convinced
56 that the args are the right way round.
58 * FSAVE does not re-initialise the FPU; it should do
60 * FINIT not only initialises the FPU environment, it also
61 zeroes all the FP registers. It should leave the registers
64 SAHF should cause eflags[1] == 1, and in fact it produces 0. As
65 per Intel docs this bit has no meaning anyway. Since PUSHF is the
66 only way to observe eflags[1], a proper fix would be to make that
69 The state of %eflags.AC (alignment check, bit 18) is recorded by
70 the simulation (viz, if you set it with popf then a pushf produces
71 the value you set it to), but it is otherwise ignored. In
72 particular, setting it to 1 does NOT cause alignment checking to
73 happen. Programs that set it to 1 and then rely on the resulting
74 SIGBUSs to inform them of misaligned accesses will not work.
76 Implementation of sysenter is necessarily partial. sysenter is a
77 kind of system call entry. When doing a sysenter, the return
78 address is not known -- that is something that is beyond Vex's
79 knowledge. So the generated IR forces a return to the scheduler,
80 which can do what it likes to simulate the systenter, but it MUST
81 set this thread's guest_EIP field with the continuation address
82 before resuming execution. If that doesn't happen, the thread will
83 jump to address zero, which is probably fatal.
85 This module uses global variables and so is not MT-safe (if that
86 should ever become relevant).
88 The delta values are 32-bit ints, not 64-bit ints. That means
89 this module may not work right if run on a 64-bit host. That should
90 be fixed properly, really -- if anyone ever wants to use Vex to
91 translate x86 code for execution on a 64-bit host.
93 casLE (implementation of lock-prefixed insns) and rep-prefixed
94 insns: the side-exit back to the start of the insn is done with
95 Ijk_Boring. This is quite wrong, it should be done with
96 Ijk_NoRedir, since otherwise the side exit, which is intended to
97 restart the instruction for whatever reason, could go somewhere
98 entirely else. Doing it right (with Ijk_NoRedir jumps) would make
99 no-redir jumps performance critical, at least for rep-prefixed
100 instructions, since all iterations thereof would involve such a
101 jump. It's not such a big deal with casLE since the side exit is
102 only taken if the CAS fails, that is, the location is contended,
103 which is relatively unlikely.
105 XXXX: Nov 2009: handling of SWP on ARM suffers from the same
108 Note also, the test for CAS success vs failure is done using
109 Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary
110 Iop_Cmp{EQ,NE} equivalents. This is so as to tell Memcheck that it
111 shouldn't definedness-check these comparisons. See
112 COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for
113 background/rationale.
116 /* Performance holes:
118 - fcom ; fstsw %ax ; sahf
119 sahf does not update the O flag (sigh) and so O needs to
120 be computed. This is done expensively; it would be better
121 to have a calculate_eflags_o helper.
123 - emwarns; some FP codes can generate huge numbers of these
124 if the fpucw is changed in an inner loop. It would be
125 better for the guest state to have an emwarn-enable reg
126 which can be set zero or nonzero. If it is zero, emwarns
127 are not flagged, and instead control just flows all the
128 way through bbs as usual.
131 /* "Special" instructions.
133 This instruction decoder can decode three special instructions
134 which mean nothing natively (are no-ops as far as regs/mem are
135 concerned) but have meaning for supporting Valgrind. A special
136 instruction is flagged by the 12-byte preamble C1C703 C1C70D C1C71D
137 C1C713 (in the standard interpretation, that means: roll $3, %edi;
138 roll $13, %edi; roll $29, %edi; roll $19, %edi). Following that,
139 one of the following 3 are allowed (standard interpretation in
142 87DB (xchgl %ebx,%ebx) %EDX = client_request ( %EAX )
143 87C9 (xchgl %ecx,%ecx) %EAX = guest_NRADDR
144 87D2 (xchgl %edx,%edx) call-noredir *%EAX
145 87FF (xchgl %edi,%edi) IR injection
147 Any other bytes following the 12-byte preamble are illegal and
148 constitute a failure in instruction decoding. This all assumes
149 that the preamble will never occur except in specific code
150 fragments designed for Valgrind to catch.
152 No prefixes may precede a "Special" instruction.
155 /* LOCK prefixed instructions. These are translated using IR-level
156 CAS statements (IRCAS) and are believed to preserve atomicity, even
157 from the point of view of some other process racing against a
158 simulated one (presumably they communicate via a shared memory
161 Handlers which are aware of LOCK prefixes are:
162 dis_op2_G_E (add, or, adc, sbb, and, sub, xor)
163 dis_cmpxchg_G_E (cmpxchg)
164 dis_Grp1 (add, or, adc, sbb, and, sub, xor)
168 dis_Grp8_Imm (bts, btc, btr)
169 dis_bt_G_E (bts, btc, btr)
174 #include "libvex_basictypes.h"
175 #include "libvex_ir.h"
177 #include "libvex_guest_x86.h"
179 #include "main_util.h"
180 #include "main_globals.h"
181 #include "guest_generic_bb_to_IR.h"
182 #include "guest_generic_x87.h"
183 #include "guest_x86_defs.h"
186 /*------------------------------------------------------------*/
188 /*------------------------------------------------------------*/
190 /* These are set at the start of the translation of an insn, right
191 down in disInstr_X86, so that we don't have to pass them around
192 endlessly. They are all constant during the translation of any
195 /* We need to know this to do sub-register accesses correctly. */
196 static VexEndness host_endness
;
198 /* Pointer to the guest code area (points to start of BB, not to the
199 insn being processed). */
200 static const UChar
* guest_code
;
202 /* The guest address corresponding to guest_code[0]. */
203 static Addr32 guest_EIP_bbstart
;
205 /* The guest address for the instruction currently being
207 static Addr32 guest_EIP_curr_instr
;
209 /* The IRSB* into which we're generating code. */
213 /*------------------------------------------------------------*/
214 /*--- Debugging output ---*/
215 /*------------------------------------------------------------*/
217 #define DIP(format, args...) \
218 if (vex_traceflags & VEX_TRACE_FE) \
219 vex_printf(format, ## args)
221 #define DIS(buf, format, args...) \
222 if (vex_traceflags & VEX_TRACE_FE) \
223 vex_sprintf(buf, format, ## args)
226 /*------------------------------------------------------------*/
227 /*--- Offsets of various parts of the x86 guest state. ---*/
228 /*------------------------------------------------------------*/
230 #define OFFB_EAX offsetof(VexGuestX86State,guest_EAX)
231 #define OFFB_EBX offsetof(VexGuestX86State,guest_EBX)
232 #define OFFB_ECX offsetof(VexGuestX86State,guest_ECX)
233 #define OFFB_EDX offsetof(VexGuestX86State,guest_EDX)
234 #define OFFB_ESP offsetof(VexGuestX86State,guest_ESP)
235 #define OFFB_EBP offsetof(VexGuestX86State,guest_EBP)
236 #define OFFB_ESI offsetof(VexGuestX86State,guest_ESI)
237 #define OFFB_EDI offsetof(VexGuestX86State,guest_EDI)
239 #define OFFB_EIP offsetof(VexGuestX86State,guest_EIP)
241 #define OFFB_CC_OP offsetof(VexGuestX86State,guest_CC_OP)
242 #define OFFB_CC_DEP1 offsetof(VexGuestX86State,guest_CC_DEP1)
243 #define OFFB_CC_DEP2 offsetof(VexGuestX86State,guest_CC_DEP2)
244 #define OFFB_CC_NDEP offsetof(VexGuestX86State,guest_CC_NDEP)
246 #define OFFB_FPREGS offsetof(VexGuestX86State,guest_FPREG[0])
247 #define OFFB_FPTAGS offsetof(VexGuestX86State,guest_FPTAG[0])
248 #define OFFB_DFLAG offsetof(VexGuestX86State,guest_DFLAG)
249 #define OFFB_IDFLAG offsetof(VexGuestX86State,guest_IDFLAG)
250 #define OFFB_ACFLAG offsetof(VexGuestX86State,guest_ACFLAG)
251 #define OFFB_FTOP offsetof(VexGuestX86State,guest_FTOP)
252 #define OFFB_FC3210 offsetof(VexGuestX86State,guest_FC3210)
253 #define OFFB_FPROUND offsetof(VexGuestX86State,guest_FPROUND)
255 #define OFFB_CS offsetof(VexGuestX86State,guest_CS)
256 #define OFFB_DS offsetof(VexGuestX86State,guest_DS)
257 #define OFFB_ES offsetof(VexGuestX86State,guest_ES)
258 #define OFFB_FS offsetof(VexGuestX86State,guest_FS)
259 #define OFFB_GS offsetof(VexGuestX86State,guest_GS)
260 #define OFFB_SS offsetof(VexGuestX86State,guest_SS)
261 #define OFFB_LDT offsetof(VexGuestX86State,guest_LDT)
262 #define OFFB_GDT offsetof(VexGuestX86State,guest_GDT)
264 #define OFFB_SSEROUND offsetof(VexGuestX86State,guest_SSEROUND)
265 #define OFFB_XMM0 offsetof(VexGuestX86State,guest_XMM0)
266 #define OFFB_XMM1 offsetof(VexGuestX86State,guest_XMM1)
267 #define OFFB_XMM2 offsetof(VexGuestX86State,guest_XMM2)
268 #define OFFB_XMM3 offsetof(VexGuestX86State,guest_XMM3)
269 #define OFFB_XMM4 offsetof(VexGuestX86State,guest_XMM4)
270 #define OFFB_XMM5 offsetof(VexGuestX86State,guest_XMM5)
271 #define OFFB_XMM6 offsetof(VexGuestX86State,guest_XMM6)
272 #define OFFB_XMM7 offsetof(VexGuestX86State,guest_XMM7)
274 #define OFFB_EMNOTE offsetof(VexGuestX86State,guest_EMNOTE)
276 #define OFFB_CMSTART offsetof(VexGuestX86State,guest_CMSTART)
277 #define OFFB_CMLEN offsetof(VexGuestX86State,guest_CMLEN)
278 #define OFFB_NRADDR offsetof(VexGuestX86State,guest_NRADDR)
280 #define OFFB_IP_AT_SYSCALL offsetof(VexGuestX86State,guest_IP_AT_SYSCALL)
283 /*------------------------------------------------------------*/
284 /*--- Helper bits and pieces for deconstructing the ---*/
285 /*--- x86 insn stream. ---*/
286 /*------------------------------------------------------------*/
288 /* This is the Intel register encoding -- integer regs. */
298 #define R_AL (0+R_EAX)
299 #define R_AH (4+R_EAX)
301 /* This is the Intel register encoding -- segment regs. */
310 /* Add a statement to the list held by "irbb". */
311 static void stmt ( IRStmt
* st
)
313 addStmtToIRSB( irsb
, st
);
316 /* Generate a new temporary of the given type. */
317 static IRTemp
newTemp ( IRType ty
)
319 vassert(isPlausibleIRType(ty
));
320 return newIRTemp( irsb
->tyenv
, ty
);
323 /* Various simple conversions */
325 static UInt
extend_s_8to32( UInt x
)
327 return (UInt
)((Int
)(x
<< 24) >> 24);
330 static UInt
extend_s_16to32 ( UInt x
)
332 return (UInt
)((Int
)(x
<< 16) >> 16);
335 /* Fetch a byte from the guest insn stream. */
336 static UChar
getIByte ( Int delta
)
338 return guest_code
[delta
];
341 /* Extract the reg field from a modRM byte. */
342 static Int
gregOfRM ( UChar mod_reg_rm
)
344 return (Int
)( (mod_reg_rm
>> 3) & 7 );
347 /* Figure out whether the mod and rm parts of a modRM byte refer to a
348 register or memory. If so, the byte will have the form 11XXXYYY,
349 where YYY is the register number. */
350 static Bool
epartIsReg ( UChar mod_reg_rm
)
352 return toBool(0xC0 == (mod_reg_rm
& 0xC0));
355 /* ... and extract the register number ... */
356 static Int
eregOfRM ( UChar mod_reg_rm
)
358 return (Int
)(mod_reg_rm
& 0x7);
361 /* Get a 8/16/32-bit unsigned value out of the insn stream. */
363 static UChar
getUChar ( Int delta
)
365 UChar v
= guest_code
[delta
+0];
369 static UInt
getUDisp16 ( Int delta
)
371 UInt v
= guest_code
[delta
+1]; v
<<= 8;
372 v
|= guest_code
[delta
+0];
376 static UInt
getUDisp32 ( Int delta
)
378 UInt v
= guest_code
[delta
+3]; v
<<= 8;
379 v
|= guest_code
[delta
+2]; v
<<= 8;
380 v
|= guest_code
[delta
+1]; v
<<= 8;
381 v
|= guest_code
[delta
+0];
385 static UInt
getUDisp ( Int size
, Int delta
)
388 case 4: return getUDisp32(delta
);
389 case 2: return getUDisp16(delta
);
390 case 1: return (UInt
)getUChar(delta
);
391 default: vpanic("getUDisp(x86)");
393 return 0; /*notreached*/
397 /* Get a byte value out of the insn stream and sign-extend to 32
399 static UInt
getSDisp8 ( Int delta
)
401 return extend_s_8to32( (UInt
) (guest_code
[delta
]) );
404 static UInt
getSDisp16 ( Int delta0
)
406 const UChar
* eip
= &guest_code
[delta0
];
408 d
|= ((*eip
++) << 8);
409 return extend_s_16to32(d
);
412 static UInt
getSDisp ( Int size
, Int delta
)
415 case 4: return getUDisp32(delta
);
416 case 2: return getSDisp16(delta
);
417 case 1: return getSDisp8(delta
);
418 default: vpanic("getSDisp(x86)");
420 return 0; /*notreached*/
424 /*------------------------------------------------------------*/
425 /*--- Helpers for constructing IR. ---*/
426 /*------------------------------------------------------------*/
428 /* Create a 1/2/4 byte read of an x86 integer registers. For 16/8 bit
429 register references, we need to take the host endianness into
430 account. Supplied value is 0 .. 7 and in the Intel instruction
433 static IRType
szToITy ( Int n
)
436 case 1: return Ity_I8
;
437 case 2: return Ity_I16
;
438 case 4: return Ity_I32
;
439 default: vpanic("szToITy(x86)");
443 /* On a little-endian host, less significant bits of the guest
444 registers are at lower addresses. Therefore, if a reference to a
445 register low half has the safe guest state offset as a reference to
448 static Int
integerGuestRegOffset ( Int sz
, UInt archreg
)
450 vassert(archreg
< 8);
452 /* Correct for little-endian host only. */
453 vassert(host_endness
== VexEndnessLE
);
455 if (sz
== 4 || sz
== 2 || (sz
== 1 && archreg
< 4)) {
457 case R_EAX
: return OFFB_EAX
;
458 case R_EBX
: return OFFB_EBX
;
459 case R_ECX
: return OFFB_ECX
;
460 case R_EDX
: return OFFB_EDX
;
461 case R_ESI
: return OFFB_ESI
;
462 case R_EDI
: return OFFB_EDI
;
463 case R_ESP
: return OFFB_ESP
;
464 case R_EBP
: return OFFB_EBP
;
465 default: vpanic("integerGuestRegOffset(x86,le)(4,2)");
469 vassert(archreg
>= 4 && archreg
< 8 && sz
== 1);
471 case R_EAX
: return 1+ OFFB_EAX
;
472 case R_EBX
: return 1+ OFFB_EBX
;
473 case R_ECX
: return 1+ OFFB_ECX
;
474 case R_EDX
: return 1+ OFFB_EDX
;
475 default: vpanic("integerGuestRegOffset(x86,le)(1h)");
479 vpanic("integerGuestRegOffset(x86,le)");
482 static Int
segmentGuestRegOffset ( UInt sreg
)
485 case R_ES
: return OFFB_ES
;
486 case R_CS
: return OFFB_CS
;
487 case R_SS
: return OFFB_SS
;
488 case R_DS
: return OFFB_DS
;
489 case R_FS
: return OFFB_FS
;
490 case R_GS
: return OFFB_GS
;
491 default: vpanic("segmentGuestRegOffset(x86)");
495 static Int
xmmGuestRegOffset ( UInt xmmreg
)
498 case 0: return OFFB_XMM0
;
499 case 1: return OFFB_XMM1
;
500 case 2: return OFFB_XMM2
;
501 case 3: return OFFB_XMM3
;
502 case 4: return OFFB_XMM4
;
503 case 5: return OFFB_XMM5
;
504 case 6: return OFFB_XMM6
;
505 case 7: return OFFB_XMM7
;
506 default: vpanic("xmmGuestRegOffset");
510 /* Lanes of vector registers are always numbered from zero being the
511 least significant lane (rightmost in the register). */
513 static Int
xmmGuestRegLane16offset ( UInt xmmreg
, Int laneno
)
515 /* Correct for little-endian host only. */
516 vassert(host_endness
== VexEndnessLE
);
517 vassert(laneno
>= 0 && laneno
< 8);
518 return xmmGuestRegOffset( xmmreg
) + 2 * laneno
;
521 static Int
xmmGuestRegLane32offset ( UInt xmmreg
, Int laneno
)
523 /* Correct for little-endian host only. */
524 vassert(host_endness
== VexEndnessLE
);
525 vassert(laneno
>= 0 && laneno
< 4);
526 return xmmGuestRegOffset( xmmreg
) + 4 * laneno
;
529 static Int
xmmGuestRegLane64offset ( UInt xmmreg
, Int laneno
)
531 /* Correct for little-endian host only. */
532 vassert(host_endness
== VexEndnessLE
);
533 vassert(laneno
>= 0 && laneno
< 2);
534 return xmmGuestRegOffset( xmmreg
) + 8 * laneno
;
537 static IRExpr
* getIReg ( Int sz
, UInt archreg
)
539 vassert(sz
== 1 || sz
== 2 || sz
== 4);
540 vassert(archreg
< 8);
541 return IRExpr_Get( integerGuestRegOffset(sz
,archreg
),
545 /* Ditto, but write to a reg instead. */
546 static void putIReg ( Int sz
, UInt archreg
, IRExpr
* e
)
548 IRType ty
= typeOfIRExpr(irsb
->tyenv
, e
);
550 case 1: vassert(ty
== Ity_I8
); break;
551 case 2: vassert(ty
== Ity_I16
); break;
552 case 4: vassert(ty
== Ity_I32
); break;
553 default: vpanic("putIReg(x86)");
555 vassert(archreg
< 8);
556 stmt( IRStmt_Put(integerGuestRegOffset(sz
,archreg
), e
) );
559 static IRExpr
* getSReg ( UInt sreg
)
561 return IRExpr_Get( segmentGuestRegOffset(sreg
), Ity_I16
);
564 static void putSReg ( UInt sreg
, IRExpr
* e
)
566 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I16
);
567 stmt( IRStmt_Put( segmentGuestRegOffset(sreg
), e
) );
570 static IRExpr
* getXMMReg ( UInt xmmreg
)
572 return IRExpr_Get( xmmGuestRegOffset(xmmreg
), Ity_V128
);
575 static IRExpr
* getXMMRegLane64 ( UInt xmmreg
, Int laneno
)
577 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg
,laneno
), Ity_I64
);
580 static IRExpr
* getXMMRegLane64F ( UInt xmmreg
, Int laneno
)
582 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg
,laneno
), Ity_F64
);
585 static IRExpr
* getXMMRegLane32 ( UInt xmmreg
, Int laneno
)
587 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg
,laneno
), Ity_I32
);
590 static IRExpr
* getXMMRegLane32F ( UInt xmmreg
, Int laneno
)
592 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg
,laneno
), Ity_F32
);
595 static void putXMMReg ( UInt xmmreg
, IRExpr
* e
)
597 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_V128
);
598 stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg
), e
) );
601 static void putXMMRegLane64 ( UInt xmmreg
, Int laneno
, IRExpr
* e
)
603 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I64
);
604 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg
,laneno
), e
) );
607 static void putXMMRegLane64F ( UInt xmmreg
, Int laneno
, IRExpr
* e
)
609 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_F64
);
610 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg
,laneno
), e
) );
613 static void putXMMRegLane32F ( UInt xmmreg
, Int laneno
, IRExpr
* e
)
615 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_F32
);
616 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg
,laneno
), e
) );
619 static void putXMMRegLane32 ( UInt xmmreg
, Int laneno
, IRExpr
* e
)
621 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I32
);
622 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg
,laneno
), e
) );
625 static void putXMMRegLane16 ( UInt xmmreg
, Int laneno
, IRExpr
* e
)
627 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I16
);
628 stmt( IRStmt_Put( xmmGuestRegLane16offset(xmmreg
,laneno
), e
) );
631 static void assign ( IRTemp dst
, IRExpr
* e
)
633 stmt( IRStmt_WrTmp(dst
, e
) );
636 static void storeLE ( IRExpr
* addr
, IRExpr
* data
)
638 stmt( IRStmt_Store(Iend_LE
, addr
, data
) );
641 static IRExpr
* unop ( IROp op
, IRExpr
* a
)
643 return IRExpr_Unop(op
, a
);
646 static IRExpr
* binop ( IROp op
, IRExpr
* a1
, IRExpr
* a2
)
648 return IRExpr_Binop(op
, a1
, a2
);
651 static IRExpr
* triop ( IROp op
, IRExpr
* a1
, IRExpr
* a2
, IRExpr
* a3
)
653 return IRExpr_Triop(op
, a1
, a2
, a3
);
656 static IRExpr
* mkexpr ( IRTemp tmp
)
658 return IRExpr_RdTmp(tmp
);
661 static IRExpr
* mkU8 ( UInt i
)
664 return IRExpr_Const(IRConst_U8( (UChar
)i
));
667 static IRExpr
* mkU16 ( UInt i
)
670 return IRExpr_Const(IRConst_U16( (UShort
)i
));
673 static IRExpr
* mkU32 ( UInt i
)
675 return IRExpr_Const(IRConst_U32(i
));
678 static IRExpr
* mkU64 ( ULong i
)
680 return IRExpr_Const(IRConst_U64(i
));
683 static IRExpr
* mkU ( IRType ty
, UInt i
)
685 if (ty
== Ity_I8
) return mkU8(i
);
686 if (ty
== Ity_I16
) return mkU16(i
);
687 if (ty
== Ity_I32
) return mkU32(i
);
688 /* If this panics, it usually means you passed a size (1,2,4)
689 value as the IRType, rather than a real IRType. */
693 static IRExpr
* mkV128 ( UShort mask
)
695 return IRExpr_Const(IRConst_V128(mask
));
698 static IRExpr
* loadLE ( IRType ty
, IRExpr
* addr
)
700 return IRExpr_Load(Iend_LE
, ty
, addr
);
703 static IROp
mkSizedOp ( IRType ty
, IROp op8
)
706 vassert(ty
== Ity_I8
|| ty
== Ity_I16
|| ty
== Ity_I32
);
707 vassert(op8
== Iop_Add8
|| op8
== Iop_Sub8
709 || op8
== Iop_Or8
|| op8
== Iop_And8
|| op8
== Iop_Xor8
710 || op8
== Iop_Shl8
|| op8
== Iop_Shr8
|| op8
== Iop_Sar8
711 || op8
== Iop_CmpEQ8
|| op8
== Iop_CmpNE8
712 || op8
== Iop_CasCmpNE8
713 || op8
== Iop_ExpCmpNE8
715 adj
= ty
==Ity_I8
? 0 : (ty
==Ity_I16
? 1 : 2);
719 static IROp
mkWidenOp ( Int szSmall
, Int szBig
, Bool signd
)
721 if (szSmall
== 1 && szBig
== 4) {
722 return signd
? Iop_8Sto32
: Iop_8Uto32
;
724 if (szSmall
== 1 && szBig
== 2) {
725 return signd
? Iop_8Sto16
: Iop_8Uto16
;
727 if (szSmall
== 2 && szBig
== 4) {
728 return signd
? Iop_16Sto32
: Iop_16Uto32
;
730 vpanic("mkWidenOp(x86,guest)");
733 static IRExpr
* mkAnd1 ( IRExpr
* x
, IRExpr
* y
)
735 vassert(typeOfIRExpr(irsb
->tyenv
,x
) == Ity_I1
);
736 vassert(typeOfIRExpr(irsb
->tyenv
,y
) == Ity_I1
);
737 return unop(Iop_32to1
,
740 unop(Iop_1Uto32
,y
)));
743 /* Generate a compare-and-swap operation, operating on memory at
744 'addr'. The expected value is 'expVal' and the new value is
745 'newVal'. If the operation fails, then transfer control (with a
746 no-redir jump (XXX no -- see comment at top of this file)) to
747 'restart_point', which is presumably the address of the guest
748 instruction again -- retrying, essentially. */
749 static void casLE ( IRExpr
* addr
, IRExpr
* expVal
, IRExpr
* newVal
,
750 Addr32 restart_point
)
753 IRType tyE
= typeOfIRExpr(irsb
->tyenv
, expVal
);
754 IRType tyN
= typeOfIRExpr(irsb
->tyenv
, newVal
);
755 IRTemp oldTmp
= newTemp(tyE
);
756 IRTemp expTmp
= newTemp(tyE
);
758 vassert(tyE
== Ity_I32
|| tyE
== Ity_I16
|| tyE
== Ity_I8
);
759 assign(expTmp
, expVal
);
760 cas
= mkIRCAS( IRTemp_INVALID
, oldTmp
, Iend_LE
, addr
,
761 NULL
, mkexpr(expTmp
), NULL
, newVal
);
762 stmt( IRStmt_CAS(cas
) );
764 binop( mkSizedOp(tyE
,Iop_CasCmpNE8
),
765 mkexpr(oldTmp
), mkexpr(expTmp
) ),
766 Ijk_Boring
, /*Ijk_NoRedir*/
767 IRConst_U32( restart_point
),
773 /*------------------------------------------------------------*/
774 /*--- Helpers for %eflags. ---*/
775 /*------------------------------------------------------------*/
777 /* -------------- Evaluating the flags-thunk. -------------- */
779 /* Build IR to calculate all the eflags from stored
780 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
782 static IRExpr
* mk_x86g_calculate_eflags_all ( void )
785 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP
, Ity_I32
),
786 IRExpr_Get(OFFB_CC_DEP1
, Ity_I32
),
787 IRExpr_Get(OFFB_CC_DEP2
, Ity_I32
),
788 IRExpr_Get(OFFB_CC_NDEP
, Ity_I32
) );
793 "x86g_calculate_eflags_all", &x86g_calculate_eflags_all
,
796 /* Exclude OP and NDEP from definedness checking. We're only
797 interested in DEP1 and DEP2. */
798 call
->Iex
.CCall
.cee
->mcx_mask
= (1<<0) | (1<<3);
802 /* Build IR to calculate some particular condition from stored
803 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
805 static IRExpr
* mk_x86g_calculate_condition ( X86Condcode cond
)
808 = mkIRExprVec_5( mkU32(cond
),
809 IRExpr_Get(OFFB_CC_OP
, Ity_I32
),
810 IRExpr_Get(OFFB_CC_DEP1
, Ity_I32
),
811 IRExpr_Get(OFFB_CC_DEP2
, Ity_I32
),
812 IRExpr_Get(OFFB_CC_NDEP
, Ity_I32
) );
817 "x86g_calculate_condition", &x86g_calculate_condition
,
820 /* Exclude the requested condition, OP and NDEP from definedness
821 checking. We're only interested in DEP1 and DEP2. */
822 call
->Iex
.CCall
.cee
->mcx_mask
= (1<<0) | (1<<1) | (1<<4);
823 return unop(Iop_32to1
, call
);
826 /* Build IR to calculate just the carry flag from stored
827 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I32. */
828 static IRExpr
* mk_x86g_calculate_eflags_c ( void )
831 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP
, Ity_I32
),
832 IRExpr_Get(OFFB_CC_DEP1
, Ity_I32
),
833 IRExpr_Get(OFFB_CC_DEP2
, Ity_I32
),
834 IRExpr_Get(OFFB_CC_NDEP
, Ity_I32
) );
839 "x86g_calculate_eflags_c", &x86g_calculate_eflags_c
,
842 /* Exclude OP and NDEP from definedness checking. We're only
843 interested in DEP1 and DEP2. */
844 call
->Iex
.CCall
.cee
->mcx_mask
= (1<<0) | (1<<3);
849 /* -------------- Building the flags-thunk. -------------- */
851 /* The machinery in this section builds the flag-thunk following a
852 flag-setting operation. Hence the various setFlags_* functions.
855 static Bool
isAddSub ( IROp op8
)
857 return toBool(op8
== Iop_Add8
|| op8
== Iop_Sub8
);
860 static Bool
isLogic ( IROp op8
)
862 return toBool(op8
== Iop_And8
|| op8
== Iop_Or8
|| op8
== Iop_Xor8
);
865 /* U-widen 8/16/32 bit int expr to 32. */
866 static IRExpr
* widenUto32 ( IRExpr
* e
)
868 switch (typeOfIRExpr(irsb
->tyenv
,e
)) {
869 case Ity_I32
: return e
;
870 case Ity_I16
: return unop(Iop_16Uto32
,e
);
871 case Ity_I8
: return unop(Iop_8Uto32
,e
);
872 default: vpanic("widenUto32");
876 /* S-widen 8/16/32 bit int expr to 32. */
877 static IRExpr
* widenSto32 ( IRExpr
* e
)
879 switch (typeOfIRExpr(irsb
->tyenv
,e
)) {
880 case Ity_I32
: return e
;
881 case Ity_I16
: return unop(Iop_16Sto32
,e
);
882 case Ity_I8
: return unop(Iop_8Sto32
,e
);
883 default: vpanic("widenSto32");
887 /* Narrow 8/16/32 bit int expr to 8/16/32. Clearly only some
888 of these combinations make sense. */
889 static IRExpr
* narrowTo ( IRType dst_ty
, IRExpr
* e
)
891 IRType src_ty
= typeOfIRExpr(irsb
->tyenv
,e
);
892 if (src_ty
== dst_ty
)
894 if (src_ty
== Ity_I32
&& dst_ty
== Ity_I16
)
895 return unop(Iop_32to16
, e
);
896 if (src_ty
== Ity_I32
&& dst_ty
== Ity_I8
)
897 return unop(Iop_32to8
, e
);
899 vex_printf("\nsrc, dst tys are: ");
904 vpanic("narrowTo(x86)");
908 /* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is
909 auto-sized up to the real op. */
912 void setFlags_DEP1_DEP2 ( IROp op8
, IRTemp dep1
, IRTemp dep2
, IRType ty
)
914 Int ccOp
= ty
==Ity_I8
? 0 : (ty
==Ity_I16
? 1 : 2);
916 vassert(ty
== Ity_I8
|| ty
== Ity_I16
|| ty
== Ity_I32
);
919 case Iop_Add8
: ccOp
+= X86G_CC_OP_ADDB
; break;
920 case Iop_Sub8
: ccOp
+= X86G_CC_OP_SUBB
; break;
921 default: ppIROp(op8
);
922 vpanic("setFlags_DEP1_DEP2(x86)");
924 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(ccOp
)) );
925 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto32(mkexpr(dep1
))) );
926 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto32(mkexpr(dep2
))) );
927 /* Set NDEP even though it isn't used. This makes redundant-PUT
928 elimination of previous stores to this field work better. */
929 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
933 /* Set the OP and DEP1 fields only, and write zero to DEP2. */
936 void setFlags_DEP1 ( IROp op8
, IRTemp dep1
, IRType ty
)
938 Int ccOp
= ty
==Ity_I8
? 0 : (ty
==Ity_I16
? 1 : 2);
940 vassert(ty
== Ity_I8
|| ty
== Ity_I16
|| ty
== Ity_I32
);
945 case Iop_Xor8
: ccOp
+= X86G_CC_OP_LOGICB
; break;
946 default: ppIROp(op8
);
947 vpanic("setFlags_DEP1(x86)");
949 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(ccOp
)) );
950 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto32(mkexpr(dep1
))) );
951 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0)) );
952 /* Set NDEP even though it isn't used. This makes redundant-PUT
953 elimination of previous stores to this field work better. */
954 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
958 /* For shift operations, we put in the result and the undershifted
959 result. Except if the shift amount is zero, the thunk is left
962 static void setFlags_DEP1_DEP2_shift ( IROp op32
,
968 Int ccOp
= ty
==Ity_I8
? 2 : (ty
==Ity_I16
? 1 : 0);
970 vassert(ty
== Ity_I8
|| ty
== Ity_I16
|| ty
== Ity_I32
);
973 /* Both kinds of right shifts are handled by the same thunk
977 case Iop_Sar32
: ccOp
= X86G_CC_OP_SHRL
- ccOp
; break;
978 case Iop_Shl32
: ccOp
= X86G_CC_OP_SHLL
- ccOp
; break;
979 default: ppIROp(op32
);
980 vpanic("setFlags_DEP1_DEP2_shift(x86)");
983 /* guard :: Ity_I8. We need to convert it to I1. */
984 IRTemp guardB
= newTemp(Ity_I1
);
985 assign( guardB
, binop(Iop_CmpNE8
, mkexpr(guard
), mkU8(0)) );
987 /* DEP1 contains the result, DEP2 contains the undershifted value. */
988 stmt( IRStmt_Put( OFFB_CC_OP
,
989 IRExpr_ITE( mkexpr(guardB
),
991 IRExpr_Get(OFFB_CC_OP
,Ity_I32
) ) ));
992 stmt( IRStmt_Put( OFFB_CC_DEP1
,
993 IRExpr_ITE( mkexpr(guardB
),
994 widenUto32(mkexpr(res
)),
995 IRExpr_Get(OFFB_CC_DEP1
,Ity_I32
) ) ));
996 stmt( IRStmt_Put( OFFB_CC_DEP2
,
997 IRExpr_ITE( mkexpr(guardB
),
998 widenUto32(mkexpr(resUS
)),
999 IRExpr_Get(OFFB_CC_DEP2
,Ity_I32
) ) ));
1000 /* Set NDEP even though it isn't used. This makes redundant-PUT
1001 elimination of previous stores to this field work better. */
1002 stmt( IRStmt_Put( OFFB_CC_NDEP
,
1003 IRExpr_ITE( mkexpr(guardB
),
1005 IRExpr_Get(OFFB_CC_NDEP
,Ity_I32
) ) ));
1009 /* For the inc/dec case, we store in DEP1 the result value and in NDEP
1010 the former value of the carry flag, which unfortunately we have to
1013 static void setFlags_INC_DEC ( Bool inc
, IRTemp res
, IRType ty
)
1015 Int ccOp
= inc
? X86G_CC_OP_INCB
: X86G_CC_OP_DECB
;
1017 ccOp
+= ty
==Ity_I8
? 0 : (ty
==Ity_I16
? 1 : 2);
1018 vassert(ty
== Ity_I8
|| ty
== Ity_I16
|| ty
== Ity_I32
);
1020 /* This has to come first, because calculating the C flag
1021 may require reading all four thunk fields. */
1022 stmt( IRStmt_Put( OFFB_CC_NDEP
, mk_x86g_calculate_eflags_c()) );
1023 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(ccOp
)) );
1024 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto32(mkexpr(res
))) );
1025 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0)) );
1029 /* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the
1033 void setFlags_MUL ( IRType ty
, IRTemp arg1
, IRTemp arg2
, UInt base_op
)
1037 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(base_op
+0) ) );
1040 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(base_op
+1) ) );
1043 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(base_op
+2) ) );
1046 vpanic("setFlags_MUL(x86)");
1048 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto32(mkexpr(arg1
)) ));
1049 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto32(mkexpr(arg2
)) ));
1050 /* Set NDEP even though it isn't used. This makes redundant-PUT
1051 elimination of previous stores to this field work better. */
1052 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
1056 /* -------------- Condition codes. -------------- */
1058 /* Condition codes, using the Intel encoding. */
1060 static const HChar
* name_X86Condcode ( X86Condcode cond
)
1063 case X86CondO
: return "o";
1064 case X86CondNO
: return "no";
1065 case X86CondB
: return "b";
1066 case X86CondNB
: return "nb";
1067 case X86CondZ
: return "z";
1068 case X86CondNZ
: return "nz";
1069 case X86CondBE
: return "be";
1070 case X86CondNBE
: return "nbe";
1071 case X86CondS
: return "s";
1072 case X86CondNS
: return "ns";
1073 case X86CondP
: return "p";
1074 case X86CondNP
: return "np";
1075 case X86CondL
: return "l";
1076 case X86CondNL
: return "nl";
1077 case X86CondLE
: return "le";
1078 case X86CondNLE
: return "nle";
1079 case X86CondAlways
: return "ALWAYS";
1080 default: vpanic("name_X86Condcode");
1085 X86Condcode
positiveIse_X86Condcode ( X86Condcode cond
,
1088 vassert(cond
>= X86CondO
&& cond
<= X86CondNLE
);
1093 *needInvert
= False
;
1099 /* -------------- Helpers for ADD/SUB with carry. -------------- */
1101 /* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags
1104 Optionally, generate a store for the 'tres' value. This can either
1105 be a normal store, or it can be a cas-with-possible-failure style
1108 if taddr is IRTemp_INVALID, then no store is generated.
1110 if taddr is not IRTemp_INVALID, then a store (using taddr as
1111 the address) is generated:
1113 if texpVal is IRTemp_INVALID then a normal store is
1114 generated, and restart_point must be zero (it is irrelevant).
1116 if texpVal is not IRTemp_INVALID then a cas-style store is
1117 generated. texpVal is the expected value, restart_point
1118 is the restart point if the store fails, and texpVal must
1119 have the same type as tres.
1121 static void helper_ADC ( Int sz
,
1122 IRTemp tres
, IRTemp ta1
, IRTemp ta2
,
1123 /* info about optional store: */
1124 IRTemp taddr
, IRTemp texpVal
, Addr32 restart_point
)
1127 IRType ty
= szToITy(sz
);
1128 IRTemp oldc
= newTemp(Ity_I32
);
1129 IRTemp oldcn
= newTemp(ty
);
1130 IROp plus
= mkSizedOp(ty
, Iop_Add8
);
1131 IROp
xor = mkSizedOp(ty
, Iop_Xor8
);
1133 vassert(typeOfIRTemp(irsb
->tyenv
, tres
) == ty
);
1134 vassert(sz
== 1 || sz
== 2 || sz
== 4);
1135 thunkOp
= sz
==4 ? X86G_CC_OP_ADCL
1136 : (sz
==2 ? X86G_CC_OP_ADCW
: X86G_CC_OP_ADCB
);
1138 /* oldc = old carry flag, 0 or 1 */
1139 assign( oldc
, binop(Iop_And32
,
1140 mk_x86g_calculate_eflags_c(),
1143 assign( oldcn
, narrowTo(ty
, mkexpr(oldc
)) );
1145 assign( tres
, binop(plus
,
1146 binop(plus
,mkexpr(ta1
),mkexpr(ta2
)),
1149 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
1150 start of this function. */
1151 if (taddr
!= IRTemp_INVALID
) {
1152 if (texpVal
== IRTemp_INVALID
) {
1153 vassert(restart_point
== 0);
1154 storeLE( mkexpr(taddr
), mkexpr(tres
) );
1156 vassert(typeOfIRTemp(irsb
->tyenv
, texpVal
) == ty
);
1157 /* .. and hence 'texpVal' has the same type as 'tres'. */
1158 casLE( mkexpr(taddr
),
1159 mkexpr(texpVal
), mkexpr(tres
), restart_point
);
1163 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(thunkOp
) ) );
1164 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto32(mkexpr(ta1
)) ));
1165 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto32(binop(xor, mkexpr(ta2
),
1166 mkexpr(oldcn
)) )) );
1167 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkexpr(oldc
) ) );
1171 /* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags
1172 appropriately. As with helper_ADC, possibly generate a store of
1173 the result -- see comments on helper_ADC for details.
1175 static void helper_SBB ( Int sz
,
1176 IRTemp tres
, IRTemp ta1
, IRTemp ta2
,
1177 /* info about optional store: */
1178 IRTemp taddr
, IRTemp texpVal
, Addr32 restart_point
)
1181 IRType ty
= szToITy(sz
);
1182 IRTemp oldc
= newTemp(Ity_I32
);
1183 IRTemp oldcn
= newTemp(ty
);
1184 IROp minus
= mkSizedOp(ty
, Iop_Sub8
);
1185 IROp
xor = mkSizedOp(ty
, Iop_Xor8
);
1187 vassert(typeOfIRTemp(irsb
->tyenv
, tres
) == ty
);
1188 vassert(sz
== 1 || sz
== 2 || sz
== 4);
1189 thunkOp
= sz
==4 ? X86G_CC_OP_SBBL
1190 : (sz
==2 ? X86G_CC_OP_SBBW
: X86G_CC_OP_SBBB
);
1192 /* oldc = old carry flag, 0 or 1 */
1193 assign( oldc
, binop(Iop_And32
,
1194 mk_x86g_calculate_eflags_c(),
1197 assign( oldcn
, narrowTo(ty
, mkexpr(oldc
)) );
1199 assign( tres
, binop(minus
,
1200 binop(minus
,mkexpr(ta1
),mkexpr(ta2
)),
1203 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
1204 start of this function. */
1205 if (taddr
!= IRTemp_INVALID
) {
1206 if (texpVal
== IRTemp_INVALID
) {
1207 vassert(restart_point
== 0);
1208 storeLE( mkexpr(taddr
), mkexpr(tres
) );
1210 vassert(typeOfIRTemp(irsb
->tyenv
, texpVal
) == ty
);
1211 /* .. and hence 'texpVal' has the same type as 'tres'. */
1212 casLE( mkexpr(taddr
),
1213 mkexpr(texpVal
), mkexpr(tres
), restart_point
);
1217 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(thunkOp
) ) );
1218 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto32(mkexpr(ta1
) )) );
1219 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto32(binop(xor, mkexpr(ta2
),
1220 mkexpr(oldcn
)) )) );
1221 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkexpr(oldc
) ) );
1225 /* -------------- Helpers for disassembly printing. -------------- */
1227 static const HChar
* nameGrp1 ( Int opc_aux
)
1229 static const HChar
* grp1_names
[8]
1230 = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" };
1231 if (opc_aux
< 0 || opc_aux
> 7) vpanic("nameGrp1(x86)");
1232 return grp1_names
[opc_aux
];
1235 static const HChar
* nameGrp2 ( Int opc_aux
)
1237 static const HChar
* grp2_names
[8]
1238 = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" };
1239 if (opc_aux
< 0 || opc_aux
> 7) vpanic("nameGrp2(x86)");
1240 return grp2_names
[opc_aux
];
1243 static const HChar
* nameGrp4 ( Int opc_aux
)
1245 static const HChar
* grp4_names
[8]
1246 = { "inc", "dec", "???", "???", "???", "???", "???", "???" };
1247 if (opc_aux
< 0 || opc_aux
> 1) vpanic("nameGrp4(x86)");
1248 return grp4_names
[opc_aux
];
1251 static const HChar
* nameGrp5 ( Int opc_aux
)
1253 static const HChar
* grp5_names
[8]
1254 = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" };
1255 if (opc_aux
< 0 || opc_aux
> 6) vpanic("nameGrp5(x86)");
1256 return grp5_names
[opc_aux
];
1259 static const HChar
* nameGrp8 ( Int opc_aux
)
1261 static const HChar
* grp8_names
[8]
1262 = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" };
1263 if (opc_aux
< 4 || opc_aux
> 7) vpanic("nameGrp8(x86)");
1264 return grp8_names
[opc_aux
];
1267 static const HChar
* nameIReg ( Int size
, Int reg
)
1269 static const HChar
* ireg32_names
[8]
1270 = { "%eax", "%ecx", "%edx", "%ebx",
1271 "%esp", "%ebp", "%esi", "%edi" };
1272 static const HChar
* ireg16_names
[8]
1273 = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di" };
1274 static const HChar
* ireg8_names
[8]
1275 = { "%al", "%cl", "%dl", "%bl",
1276 "%ah{sp}", "%ch{bp}", "%dh{si}", "%bh{di}" };
1277 if (reg
< 0 || reg
> 7) goto bad
;
1279 case 4: return ireg32_names
[reg
];
1280 case 2: return ireg16_names
[reg
];
1281 case 1: return ireg8_names
[reg
];
1284 vpanic("nameIReg(X86)");
1285 return NULL
; /*notreached*/
1288 static const HChar
* nameSReg ( UInt sreg
)
1291 case R_ES
: return "%es";
1292 case R_CS
: return "%cs";
1293 case R_SS
: return "%ss";
1294 case R_DS
: return "%ds";
1295 case R_FS
: return "%fs";
1296 case R_GS
: return "%gs";
1297 default: vpanic("nameSReg(x86)");
1301 static const HChar
* nameMMXReg ( Int mmxreg
)
1303 static const HChar
* mmx_names
[8]
1304 = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" };
1305 if (mmxreg
< 0 || mmxreg
> 7) vpanic("nameMMXReg(x86,guest)");
1306 return mmx_names
[mmxreg
];
1309 static const HChar
* nameXMMReg ( Int xmmreg
)
1311 static const HChar
* xmm_names
[8]
1312 = { "%xmm0", "%xmm1", "%xmm2", "%xmm3",
1313 "%xmm4", "%xmm5", "%xmm6", "%xmm7" };
1314 if (xmmreg
< 0 || xmmreg
> 7) vpanic("name_of_xmm_reg");
1315 return xmm_names
[xmmreg
];
1318 static const HChar
* nameMMXGran ( Int gran
)
1325 default: vpanic("nameMMXGran(x86,guest)");
1329 static HChar
nameISize ( Int size
)
1335 default: vpanic("nameISize(x86)");
1340 /*------------------------------------------------------------*/
1341 /*--- JMP helpers ---*/
1342 /*------------------------------------------------------------*/
1344 static void jmp_lit( /*MOD*/DisResult
* dres
,
1345 IRJumpKind kind
, Addr32 d32
)
1347 vassert(dres
->whatNext
== Dis_Continue
);
1348 vassert(dres
->len
== 0);
1349 vassert(dres
->jk_StopHere
== Ijk_INVALID
);
1350 dres
->whatNext
= Dis_StopHere
;
1351 dres
->jk_StopHere
= kind
;
1352 stmt( IRStmt_Put( OFFB_EIP
, mkU32(d32
) ) );
1355 static void jmp_treg( /*MOD*/DisResult
* dres
,
1356 IRJumpKind kind
, IRTemp t
)
1358 vassert(dres
->whatNext
== Dis_Continue
);
1359 vassert(dres
->len
== 0);
1360 vassert(dres
->jk_StopHere
== Ijk_INVALID
);
1361 dres
->whatNext
= Dis_StopHere
;
1362 dres
->jk_StopHere
= kind
;
1363 stmt( IRStmt_Put( OFFB_EIP
, mkexpr(t
) ) );
1367 void jcc_01( /*MOD*/DisResult
* dres
,
1368 X86Condcode cond
, Addr32 d32_false
, Addr32 d32_true
)
1371 X86Condcode condPos
;
1372 vassert(dres
->whatNext
== Dis_Continue
);
1373 vassert(dres
->len
== 0);
1374 vassert(dres
->jk_StopHere
== Ijk_INVALID
);
1375 dres
->whatNext
= Dis_StopHere
;
1376 dres
->jk_StopHere
= Ijk_Boring
;
1377 condPos
= positiveIse_X86Condcode ( cond
, &invert
);
1379 stmt( IRStmt_Exit( mk_x86g_calculate_condition(condPos
),
1381 IRConst_U32(d32_false
),
1383 stmt( IRStmt_Put( OFFB_EIP
, mkU32(d32_true
) ) );
1385 stmt( IRStmt_Exit( mk_x86g_calculate_condition(condPos
),
1387 IRConst_U32(d32_true
),
1389 stmt( IRStmt_Put( OFFB_EIP
, mkU32(d32_false
) ) );
1394 /*------------------------------------------------------------*/
1395 /*--- Disassembling addressing modes ---*/
1396 /*------------------------------------------------------------*/
1399 const HChar
* sorbTxt ( UChar sorb
)
1402 case 0: return ""; /* no override */
1403 case 0x3E: return "%ds";
1404 case 0x26: return "%es:";
1405 case 0x64: return "%fs:";
1406 case 0x65: return "%gs:";
1407 case 0x36: return "%ss:";
1408 default: vpanic("sorbTxt(x86,guest)");
1413 /* 'virtual' is an IRExpr* holding a virtual address. Convert it to a
1414 linear address by adding any required segment override as indicated
1417 IRExpr
* handleSegOverride ( UChar sorb
, IRExpr
* virtual )
1421 IRTemp ldt_ptr
, gdt_ptr
, seg_selector
, r64
;
1424 /* the common case - no override */
1428 case 0x3E: sreg
= R_DS
; break;
1429 case 0x26: sreg
= R_ES
; break;
1430 case 0x64: sreg
= R_FS
; break;
1431 case 0x65: sreg
= R_GS
; break;
1432 case 0x36: sreg
= R_SS
; break;
1433 default: vpanic("handleSegOverride(x86,guest)");
1436 hWordTy
= sizeof(HWord
)==4 ? Ity_I32
: Ity_I64
;
1438 seg_selector
= newTemp(Ity_I32
);
1439 ldt_ptr
= newTemp(hWordTy
);
1440 gdt_ptr
= newTemp(hWordTy
);
1441 r64
= newTemp(Ity_I64
);
1443 assign( seg_selector
, unop(Iop_16Uto32
, getSReg(sreg
)) );
1444 assign( ldt_ptr
, IRExpr_Get( OFFB_LDT
, hWordTy
));
1445 assign( gdt_ptr
, IRExpr_Get( OFFB_GDT
, hWordTy
));
1448 Call this to do the translation and limit checks:
1449 ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
1450 UInt seg_selector, UInt virtual_addr )
1457 "x86g_use_seg_selector",
1458 &x86g_use_seg_selector
,
1459 mkIRExprVec_4( mkexpr(ldt_ptr
), mkexpr(gdt_ptr
),
1460 mkexpr(seg_selector
), virtual)
1464 /* If the high 32 of the result are non-zero, there was a
1465 failure in address translation. In which case, make a
1470 binop(Iop_CmpNE32
, unop(Iop_64HIto32
, mkexpr(r64
)), mkU32(0)),
1472 IRConst_U32( guest_EIP_curr_instr
),
1477 /* otherwise, here's the translated result. */
1478 return unop(Iop_64to32
, mkexpr(r64
));
1482 /* Generate IR to calculate an address indicated by a ModRM and
1483 following SIB bytes. The expression, and the number of bytes in
1484 the address mode, are returned. Note that this fn should not be
1485 called if the R/M part of the address denotes a register instead of
1486 memory. If print_codegen is true, text of the addressing mode is
1489 The computed address is stored in a new tempreg, and the
1490 identity of the tempreg is returned. */
1492 static IRTemp
disAMode_copy2tmp ( IRExpr
* addr32
)
1494 IRTemp tmp
= newTemp(Ity_I32
);
1495 assign( tmp
, addr32
);
1500 IRTemp
disAMode ( Int
* len
, UChar sorb
, Int delta
, HChar
* buf
)
1502 UChar mod_reg_rm
= getIByte(delta
);
1507 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
1508 jump table seems a bit excessive.
1510 mod_reg_rm
&= 0xC7; /* is now XX000YYY */
1511 mod_reg_rm
= toUChar(mod_reg_rm
| (mod_reg_rm
>> 3));
1512 /* is now XX0XXYYY */
1513 mod_reg_rm
&= 0x1F; /* is now 000XXYYY */
1514 switch (mod_reg_rm
) {
1516 /* (%eax) .. (%edi), not including (%esp) or (%ebp).
1519 case 0x00: case 0x01: case 0x02: case 0x03:
1520 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
1521 { UChar rm
= mod_reg_rm
;
1522 DIS(buf
, "%s(%s)", sorbTxt(sorb
), nameIReg(4,rm
));
1524 return disAMode_copy2tmp(
1525 handleSegOverride(sorb
, getIReg(4,rm
)));
1528 /* d8(%eax) ... d8(%edi), not including d8(%esp)
1529 --> GET %reg, t ; ADDL d8, t
1531 case 0x08: case 0x09: case 0x0A: case 0x0B:
1532 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
1533 { UChar rm
= toUChar(mod_reg_rm
& 7);
1534 UInt d
= getSDisp8(delta
);
1535 DIS(buf
, "%s%d(%s)", sorbTxt(sorb
), (Int
)d
, nameIReg(4,rm
));
1537 return disAMode_copy2tmp(
1538 handleSegOverride(sorb
,
1539 binop(Iop_Add32
,getIReg(4,rm
),mkU32(d
))));
1542 /* d32(%eax) ... d32(%edi), not including d32(%esp)
1543 --> GET %reg, t ; ADDL d8, t
1545 case 0x10: case 0x11: case 0x12: case 0x13:
1546 /* ! 14 */ case 0x15: case 0x16: case 0x17:
1547 { UChar rm
= toUChar(mod_reg_rm
& 7);
1548 UInt d
= getUDisp32(delta
);
1549 DIS(buf
, "%s0x%x(%s)", sorbTxt(sorb
), d
, nameIReg(4,rm
));
1551 return disAMode_copy2tmp(
1552 handleSegOverride(sorb
,
1553 binop(Iop_Add32
,getIReg(4,rm
),mkU32(d
))));
1556 /* a register, %eax .. %edi. This shouldn't happen. */
1557 case 0x18: case 0x19: case 0x1A: case 0x1B:
1558 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
1559 vpanic("disAMode(x86): not an addr!");
1561 /* a 32-bit literal address
1565 { UInt d
= getUDisp32(delta
);
1567 DIS(buf
, "%s(0x%x)", sorbTxt(sorb
), d
);
1568 return disAMode_copy2tmp(
1569 handleSegOverride(sorb
, mkU32(d
)));
1573 /* SIB, with no displacement. Special cases:
1574 -- %esp cannot act as an index value.
1575 If index_r indicates %esp, zero is used for the index.
1576 -- when mod is zero and base indicates EBP, base is instead
1578 It's all madness, I tell you. Extract %index, %base and
1579 scale from the SIB byte. The value denoted is then:
1580 | %index == %ESP && %base == %EBP
1581 = d32 following SIB byte
1582 | %index == %ESP && %base != %EBP
1584 | %index != %ESP && %base == %EBP
1585 = d32 following SIB byte + (%index << scale)
1586 | %index != %ESP && %base != %ESP
1587 = %base + (%index << scale)
1589 What happens to the souls of CPU architects who dream up such
1590 horrendous schemes, do you suppose?
1592 UChar sib
= getIByte(delta
);
1593 UChar scale
= toUChar((sib
>> 6) & 3);
1594 UChar index_r
= toUChar((sib
>> 3) & 7);
1595 UChar base_r
= toUChar(sib
& 7);
1598 if (index_r
!= R_ESP
&& base_r
!= R_EBP
) {
1599 DIS(buf
, "%s(%s,%s,%d)", sorbTxt(sorb
),
1600 nameIReg(4,base_r
), nameIReg(4,index_r
), 1<<scale
);
1604 handleSegOverride(sorb
,
1607 binop(Iop_Shl32
, getIReg(4,index_r
),
1611 if (index_r
!= R_ESP
&& base_r
== R_EBP
) {
1612 UInt d
= getUDisp32(delta
);
1613 DIS(buf
, "%s0x%x(,%s,%d)", sorbTxt(sorb
), d
,
1614 nameIReg(4,index_r
), 1<<scale
);
1618 handleSegOverride(sorb
,
1620 binop(Iop_Shl32
, getIReg(4,index_r
), mkU8(scale
)),
1624 if (index_r
== R_ESP
&& base_r
!= R_EBP
) {
1625 DIS(buf
, "%s(%s,,)", sorbTxt(sorb
), nameIReg(4,base_r
));
1627 return disAMode_copy2tmp(
1628 handleSegOverride(sorb
, getIReg(4,base_r
)));
1631 if (index_r
== R_ESP
&& base_r
== R_EBP
) {
1632 UInt d
= getUDisp32(delta
);
1633 DIS(buf
, "%s0x%x(,,)", sorbTxt(sorb
), d
);
1635 return disAMode_copy2tmp(
1636 handleSegOverride(sorb
, mkU32(d
)));
1642 /* SIB, with 8-bit displacement. Special cases:
1643 -- %esp cannot act as an index value.
1644 If index_r indicates %esp, zero is used for the index.
1649 = d8 + %base + (%index << scale)
1652 UChar sib
= getIByte(delta
);
1653 UChar scale
= toUChar((sib
>> 6) & 3);
1654 UChar index_r
= toUChar((sib
>> 3) & 7);
1655 UChar base_r
= toUChar(sib
& 7);
1656 UInt d
= getSDisp8(delta
+1);
1658 if (index_r
== R_ESP
) {
1659 DIS(buf
, "%s%d(%s,,)", sorbTxt(sorb
),
1660 (Int
)d
, nameIReg(4,base_r
));
1662 return disAMode_copy2tmp(
1663 handleSegOverride(sorb
,
1664 binop(Iop_Add32
, getIReg(4,base_r
), mkU32(d
)) ));
1666 DIS(buf
, "%s%d(%s,%s,%d)", sorbTxt(sorb
), (Int
)d
,
1667 nameIReg(4,base_r
), nameIReg(4,index_r
), 1<<scale
);
1671 handleSegOverride(sorb
,
1676 getIReg(4,index_r
), mkU8(scale
))),
1683 /* SIB, with 32-bit displacement. Special cases:
1684 -- %esp cannot act as an index value.
1685 If index_r indicates %esp, zero is used for the index.
1690 = d32 + %base + (%index << scale)
1693 UChar sib
= getIByte(delta
);
1694 UChar scale
= toUChar((sib
>> 6) & 3);
1695 UChar index_r
= toUChar((sib
>> 3) & 7);
1696 UChar base_r
= toUChar(sib
& 7);
1697 UInt d
= getUDisp32(delta
+1);
1699 if (index_r
== R_ESP
) {
1700 DIS(buf
, "%s%d(%s,,)", sorbTxt(sorb
),
1701 (Int
)d
, nameIReg(4,base_r
));
1703 return disAMode_copy2tmp(
1704 handleSegOverride(sorb
,
1705 binop(Iop_Add32
, getIReg(4,base_r
), mkU32(d
)) ));
1707 DIS(buf
, "%s%d(%s,%s,%d)", sorbTxt(sorb
), (Int
)d
,
1708 nameIReg(4,base_r
), nameIReg(4,index_r
), 1<<scale
);
1712 handleSegOverride(sorb
,
1717 getIReg(4,index_r
), mkU8(scale
))),
1725 vpanic("disAMode(x86)");
1726 return 0; /*notreached*/
1731 /* Figure out the number of (insn-stream) bytes constituting the amode
1732 beginning at delta. Is useful for getting hold of literals beyond
1733 the end of the amode before it has been disassembled. */
1735 static UInt
lengthAMode ( Int delta
)
1737 UChar mod_reg_rm
= getIByte(delta
); delta
++;
1739 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
1740 jump table seems a bit excessive.
1742 mod_reg_rm
&= 0xC7; /* is now XX000YYY */
1743 mod_reg_rm
= toUChar(mod_reg_rm
| (mod_reg_rm
>> 3));
1744 /* is now XX0XXYYY */
1745 mod_reg_rm
&= 0x1F; /* is now 000XXYYY */
1746 switch (mod_reg_rm
) {
1748 /* (%eax) .. (%edi), not including (%esp) or (%ebp). */
1749 case 0x00: case 0x01: case 0x02: case 0x03:
1750 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
1753 /* d8(%eax) ... d8(%edi), not including d8(%esp). */
1754 case 0x08: case 0x09: case 0x0A: case 0x0B:
1755 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
1758 /* d32(%eax) ... d32(%edi), not including d32(%esp). */
1759 case 0x10: case 0x11: case 0x12: case 0x13:
1760 /* ! 14 */ case 0x15: case 0x16: case 0x17:
1763 /* a register, %eax .. %edi. (Not an addr, but still handled.) */
1764 case 0x18: case 0x19: case 0x1A: case 0x1B:
1765 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
1768 /* a 32-bit literal address. */
1769 case 0x05: return 5;
1771 /* SIB, no displacement. */
1773 UChar sib
= getIByte(delta
);
1774 UChar base_r
= toUChar(sib
& 7);
1775 if (base_r
== R_EBP
) return 6; else return 2;
1777 /* SIB, with 8-bit displacement. */
1778 case 0x0C: return 3;
1780 /* SIB, with 32-bit displacement. */
1781 case 0x14: return 6;
1784 vpanic("lengthAMode");
1785 return 0; /*notreached*/
1789 /*------------------------------------------------------------*/
1790 /*--- Disassembling common idioms ---*/
1791 /*------------------------------------------------------------*/
1793 /* Handle binary integer instructions of the form
1796 Is passed the a ptr to the modRM byte, the actual operation, and the
1797 data size. Returns the address advanced completely over this
1800 E(src) is reg-or-mem
1803 If E is reg, --> GET %G, tmp
1807 If E is mem and OP is not reversible,
1808 --> (getAddr E) -> tmpa
1814 If E is mem and OP is reversible
1815 --> (getAddr E) -> tmpa
1821 UInt
dis_op2_E_G ( UChar sorb
,
1827 const HChar
* t_x86opc
)
1831 IRType ty
= szToITy(size
);
1832 IRTemp dst1
= newTemp(ty
);
1833 IRTemp src
= newTemp(ty
);
1834 IRTemp dst0
= newTemp(ty
);
1835 UChar rm
= getUChar(delta0
);
1836 IRTemp addr
= IRTemp_INVALID
;
1838 /* addSubCarry == True indicates the intended operation is
1839 add-with-carry or subtract-with-borrow. */
1841 vassert(op8
== Iop_Add8
|| op8
== Iop_Sub8
);
1845 if (epartIsReg(rm
)) {
1846 /* Specially handle XOR reg,reg, because that doesn't really
1847 depend on reg, and doing the obvious thing potentially
1848 generates a spurious value check failure due to the bogus
1849 dependency. Ditto SBB reg,reg. */
1850 if ((op8
== Iop_Xor8
|| (op8
== Iop_Sub8
&& addSubCarry
))
1851 && gregOfRM(rm
) == eregOfRM(rm
)) {
1852 putIReg(size
, gregOfRM(rm
), mkU(ty
,0));
1854 assign( dst0
, getIReg(size
,gregOfRM(rm
)) );
1855 assign( src
, getIReg(size
,eregOfRM(rm
)) );
1857 if (addSubCarry
&& op8
== Iop_Add8
) {
1858 helper_ADC( size
, dst1
, dst0
, src
,
1859 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
1860 putIReg(size
, gregOfRM(rm
), mkexpr(dst1
));
1862 if (addSubCarry
&& op8
== Iop_Sub8
) {
1863 helper_SBB( size
, dst1
, dst0
, src
,
1864 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
1865 putIReg(size
, gregOfRM(rm
), mkexpr(dst1
));
1867 assign( dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)) );
1869 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
1871 setFlags_DEP1(op8
, dst1
, ty
);
1873 putIReg(size
, gregOfRM(rm
), mkexpr(dst1
));
1876 DIP("%s%c %s,%s\n", t_x86opc
, nameISize(size
),
1877 nameIReg(size
,eregOfRM(rm
)),
1878 nameIReg(size
,gregOfRM(rm
)));
1881 /* E refers to memory */
1882 addr
= disAMode ( &len
, sorb
, delta0
, dis_buf
);
1883 assign( dst0
, getIReg(size
,gregOfRM(rm
)) );
1884 assign( src
, loadLE(szToITy(size
), mkexpr(addr
)) );
1886 if (addSubCarry
&& op8
== Iop_Add8
) {
1887 helper_ADC( size
, dst1
, dst0
, src
,
1888 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
1889 putIReg(size
, gregOfRM(rm
), mkexpr(dst1
));
1891 if (addSubCarry
&& op8
== Iop_Sub8
) {
1892 helper_SBB( size
, dst1
, dst0
, src
,
1893 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
1894 putIReg(size
, gregOfRM(rm
), mkexpr(dst1
));
1896 assign( dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)) );
1898 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
1900 setFlags_DEP1(op8
, dst1
, ty
);
1902 putIReg(size
, gregOfRM(rm
), mkexpr(dst1
));
1905 DIP("%s%c %s,%s\n", t_x86opc
, nameISize(size
),
1906 dis_buf
,nameIReg(size
,gregOfRM(rm
)));
1913 /* Handle binary integer instructions of the form
1916 Is passed the a ptr to the modRM byte, the actual operation, and the
1917 data size. Returns the address advanced completely over this
1921 E(dst) is reg-or-mem
1923 If E is reg, --> GET %E, tmp
1927 If E is mem, --> (getAddr E) -> tmpa
1933 UInt
dis_op2_G_E ( UChar sorb
,
1940 const HChar
* t_x86opc
)
1944 IRType ty
= szToITy(size
);
1945 IRTemp dst1
= newTemp(ty
);
1946 IRTemp src
= newTemp(ty
);
1947 IRTemp dst0
= newTemp(ty
);
1948 UChar rm
= getIByte(delta0
);
1949 IRTemp addr
= IRTemp_INVALID
;
1951 /* addSubCarry == True indicates the intended operation is
1952 add-with-carry or subtract-with-borrow. */
1954 vassert(op8
== Iop_Add8
|| op8
== Iop_Sub8
);
1958 if (epartIsReg(rm
)) {
1959 /* Specially handle XOR reg,reg, because that doesn't really
1960 depend on reg, and doing the obvious thing potentially
1961 generates a spurious value check failure due to the bogus
1962 dependency. Ditto SBB reg,reg.*/
1963 if ((op8
== Iop_Xor8
|| (op8
== Iop_Sub8
&& addSubCarry
))
1964 && gregOfRM(rm
) == eregOfRM(rm
)) {
1965 putIReg(size
, eregOfRM(rm
), mkU(ty
,0));
1967 assign(dst0
, getIReg(size
,eregOfRM(rm
)));
1968 assign(src
, getIReg(size
,gregOfRM(rm
)));
1970 if (addSubCarry
&& op8
== Iop_Add8
) {
1971 helper_ADC( size
, dst1
, dst0
, src
,
1972 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
1973 putIReg(size
, eregOfRM(rm
), mkexpr(dst1
));
1975 if (addSubCarry
&& op8
== Iop_Sub8
) {
1976 helper_SBB( size
, dst1
, dst0
, src
,
1977 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
1978 putIReg(size
, eregOfRM(rm
), mkexpr(dst1
));
1980 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)));
1982 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
1984 setFlags_DEP1(op8
, dst1
, ty
);
1986 putIReg(size
, eregOfRM(rm
), mkexpr(dst1
));
1989 DIP("%s%c %s,%s\n", t_x86opc
, nameISize(size
),
1990 nameIReg(size
,gregOfRM(rm
)),
1991 nameIReg(size
,eregOfRM(rm
)));
1995 /* E refers to memory */
1997 addr
= disAMode ( &len
, sorb
, delta0
, dis_buf
);
1998 assign(dst0
, loadLE(ty
,mkexpr(addr
)));
1999 assign(src
, getIReg(size
,gregOfRM(rm
)));
2001 if (addSubCarry
&& op8
== Iop_Add8
) {
2003 /* cas-style store */
2004 helper_ADC( size
, dst1
, dst0
, src
,
2005 /*store*/addr
, dst0
/*expVal*/, guest_EIP_curr_instr
);
2008 helper_ADC( size
, dst1
, dst0
, src
,
2009 /*store*/addr
, IRTemp_INVALID
, 0 );
2012 if (addSubCarry
&& op8
== Iop_Sub8
) {
2014 /* cas-style store */
2015 helper_SBB( size
, dst1
, dst0
, src
,
2016 /*store*/addr
, dst0
/*expVal*/, guest_EIP_curr_instr
);
2019 helper_SBB( size
, dst1
, dst0
, src
,
2020 /*store*/addr
, IRTemp_INVALID
, 0 );
2023 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)));
2026 if (0) vex_printf("locked case\n" );
2027 casLE( mkexpr(addr
),
2028 mkexpr(dst0
)/*expval*/,
2029 mkexpr(dst1
)/*newval*/, guest_EIP_curr_instr
);
2031 if (0) vex_printf("nonlocked case\n");
2032 storeLE(mkexpr(addr
), mkexpr(dst1
));
2036 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
2038 setFlags_DEP1(op8
, dst1
, ty
);
2041 DIP("%s%c %s,%s\n", t_x86opc
, nameISize(size
),
2042 nameIReg(size
,gregOfRM(rm
)), dis_buf
);
2048 /* Handle move instructions of the form
2051 Is passed the a ptr to the modRM byte, and the data size. Returns
2052 the address advanced completely over this instruction.
2054 E(src) is reg-or-mem
2057 If E is reg, --> GET %E, tmpv
2060 If E is mem --> (getAddr E) -> tmpa
2065 UInt
dis_mov_E_G ( UChar sorb
,
2070 UChar rm
= getIByte(delta0
);
2073 if (epartIsReg(rm
)) {
2074 putIReg(size
, gregOfRM(rm
), getIReg(size
, eregOfRM(rm
)));
2075 DIP("mov%c %s,%s\n", nameISize(size
),
2076 nameIReg(size
,eregOfRM(rm
)),
2077 nameIReg(size
,gregOfRM(rm
)));
2081 /* E refers to memory */
2083 IRTemp addr
= disAMode ( &len
, sorb
, delta0
, dis_buf
);
2084 putIReg(size
, gregOfRM(rm
), loadLE(szToITy(size
), mkexpr(addr
)));
2085 DIP("mov%c %s,%s\n", nameISize(size
),
2086 dis_buf
,nameIReg(size
,gregOfRM(rm
)));
2092 /* Handle move instructions of the form
2095 Is passed the a ptr to the modRM byte, and the data size. Returns
2096 the address advanced completely over this instruction.
2099 E(dst) is reg-or-mem
2101 If E is reg, --> GET %G, tmp
2104 If E is mem, --> (getAddr E) -> tmpa
2109 UInt
dis_mov_G_E ( UChar sorb
,
2114 UChar rm
= getIByte(delta0
);
2117 if (epartIsReg(rm
)) {
2118 putIReg(size
, eregOfRM(rm
), getIReg(size
, gregOfRM(rm
)));
2119 DIP("mov%c %s,%s\n", nameISize(size
),
2120 nameIReg(size
,gregOfRM(rm
)),
2121 nameIReg(size
,eregOfRM(rm
)));
2125 /* E refers to memory */
2127 IRTemp addr
= disAMode ( &len
, sorb
, delta0
, dis_buf
);
2128 storeLE( mkexpr(addr
), getIReg(size
, gregOfRM(rm
)) );
2129 DIP("mov%c %s,%s\n", nameISize(size
),
2130 nameIReg(size
,gregOfRM(rm
)), dis_buf
);
2136 /* op $immediate, AL/AX/EAX. */
2138 UInt
dis_op_imm_A ( Int size
,
2143 const HChar
* t_x86opc
)
2145 IRType ty
= szToITy(size
);
2146 IRTemp dst0
= newTemp(ty
);
2147 IRTemp src
= newTemp(ty
);
2148 IRTemp dst1
= newTemp(ty
);
2149 UInt lit
= getUDisp(size
,delta
);
2150 assign(dst0
, getIReg(size
,R_EAX
));
2151 assign(src
, mkU(ty
,lit
));
2153 if (isAddSub(op8
) && !carrying
) {
2154 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)) );
2155 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
2160 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)) );
2161 setFlags_DEP1(op8
, dst1
, ty
);
2164 if (op8
== Iop_Add8
&& carrying
) {
2165 helper_ADC( size
, dst1
, dst0
, src
,
2166 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
2169 if (op8
== Iop_Sub8
&& carrying
) {
2170 helper_SBB( size
, dst1
, dst0
, src
,
2171 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
2174 vpanic("dis_op_imm_A(x86,guest)");
2177 putIReg(size
, R_EAX
, mkexpr(dst1
));
2179 DIP("%s%c $0x%x, %s\n", t_x86opc
, nameISize(size
),
2180 lit
, nameIReg(size
,R_EAX
));
2185 /* Sign- and Zero-extending moves. */
2187 UInt
dis_movx_E_G ( UChar sorb
,
2188 Int delta
, Int szs
, Int szd
, Bool sign_extend
)
2190 UChar rm
= getIByte(delta
);
2191 if (epartIsReg(rm
)) {
2193 // mutant case. See #250799
2194 putIReg(szd
, gregOfRM(rm
),
2195 getIReg(szs
,eregOfRM(rm
)));
2198 putIReg(szd
, gregOfRM(rm
),
2199 unop(mkWidenOp(szs
,szd
,sign_extend
),
2200 getIReg(szs
,eregOfRM(rm
))));
2202 DIP("mov%c%c%c %s,%s\n", sign_extend
? 's' : 'z',
2203 nameISize(szs
), nameISize(szd
),
2204 nameIReg(szs
,eregOfRM(rm
)),
2205 nameIReg(szd
,gregOfRM(rm
)));
2209 /* E refers to memory */
2213 IRTemp addr
= disAMode ( &len
, sorb
, delta
, dis_buf
);
2215 // mutant case. See #250799
2216 putIReg(szd
, gregOfRM(rm
),
2217 loadLE(szToITy(szs
),mkexpr(addr
)));
2220 putIReg(szd
, gregOfRM(rm
),
2221 unop(mkWidenOp(szs
,szd
,sign_extend
),
2222 loadLE(szToITy(szs
),mkexpr(addr
))));
2224 DIP("mov%c%c%c %s,%s\n", sign_extend
? 's' : 'z',
2225 nameISize(szs
), nameISize(szd
),
2226 dis_buf
, nameIReg(szd
,gregOfRM(rm
)));
2232 /* Generate code to divide ArchRegs EDX:EAX / DX:AX / AX by the 32 /
2233 16 / 8 bit quantity in the given IRTemp. */
2235 void codegen_div ( Int sz
, IRTemp t
, Bool signed_divide
)
2237 IROp op
= signed_divide
? Iop_DivModS64to32
: Iop_DivModU64to32
;
2238 IRTemp src64
= newTemp(Ity_I64
);
2239 IRTemp dst64
= newTemp(Ity_I64
);
2242 assign( src64
, binop(Iop_32HLto64
,
2243 getIReg(4,R_EDX
), getIReg(4,R_EAX
)) );
2244 assign( dst64
, binop(op
, mkexpr(src64
), mkexpr(t
)) );
2245 putIReg( 4, R_EAX
, unop(Iop_64to32
,mkexpr(dst64
)) );
2246 putIReg( 4, R_EDX
, unop(Iop_64HIto32
,mkexpr(dst64
)) );
2249 IROp widen3264
= signed_divide
? Iop_32Sto64
: Iop_32Uto64
;
2250 IROp widen1632
= signed_divide
? Iop_16Sto32
: Iop_16Uto32
;
2251 assign( src64
, unop(widen3264
,
2253 getIReg(2,R_EDX
), getIReg(2,R_EAX
))) );
2254 assign( dst64
, binop(op
, mkexpr(src64
), unop(widen1632
,mkexpr(t
))) );
2255 putIReg( 2, R_EAX
, unop(Iop_32to16
,unop(Iop_64to32
,mkexpr(dst64
))) );
2256 putIReg( 2, R_EDX
, unop(Iop_32to16
,unop(Iop_64HIto32
,mkexpr(dst64
))) );
2260 IROp widen3264
= signed_divide
? Iop_32Sto64
: Iop_32Uto64
;
2261 IROp widen1632
= signed_divide
? Iop_16Sto32
: Iop_16Uto32
;
2262 IROp widen816
= signed_divide
? Iop_8Sto16
: Iop_8Uto16
;
2263 assign( src64
, unop(widen3264
, unop(widen1632
, getIReg(2,R_EAX
))) );
2265 binop(op
, mkexpr(src64
),
2266 unop(widen1632
, unop(widen816
, mkexpr(t
)))) );
2267 putIReg( 1, R_AL
, unop(Iop_16to8
, unop(Iop_32to16
,
2268 unop(Iop_64to32
,mkexpr(dst64
)))) );
2269 putIReg( 1, R_AH
, unop(Iop_16to8
, unop(Iop_32to16
,
2270 unop(Iop_64HIto32
,mkexpr(dst64
)))) );
2273 default: vpanic("codegen_div(x86)");
2279 UInt
dis_Grp1 ( UChar sorb
, Bool locked
,
2280 Int delta
, UChar modrm
,
2281 Int am_sz
, Int d_sz
, Int sz
, UInt d32
)
2285 IRType ty
= szToITy(sz
);
2286 IRTemp dst1
= newTemp(ty
);
2287 IRTemp src
= newTemp(ty
);
2288 IRTemp dst0
= newTemp(ty
);
2289 IRTemp addr
= IRTemp_INVALID
;
2290 IROp op8
= Iop_INVALID
;
2291 UInt mask
= sz
==1 ? 0xFF : (sz
==2 ? 0xFFFF : 0xFFFFFFFF);
2293 switch (gregOfRM(modrm
)) {
2294 case 0: op8
= Iop_Add8
; break; case 1: op8
= Iop_Or8
; break;
2295 case 2: break; // ADC
2296 case 3: break; // SBB
2297 case 4: op8
= Iop_And8
; break; case 5: op8
= Iop_Sub8
; break;
2298 case 6: op8
= Iop_Xor8
; break; case 7: op8
= Iop_Sub8
; break;
2300 default: vpanic("dis_Grp1: unhandled case");
2303 if (epartIsReg(modrm
)) {
2304 vassert(am_sz
== 1);
2306 assign(dst0
, getIReg(sz
,eregOfRM(modrm
)));
2307 assign(src
, mkU(ty
,d32
& mask
));
2309 if (gregOfRM(modrm
) == 2 /* ADC */) {
2310 helper_ADC( sz
, dst1
, dst0
, src
,
2311 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
2313 if (gregOfRM(modrm
) == 3 /* SBB */) {
2314 helper_SBB( sz
, dst1
, dst0
, src
,
2315 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
2317 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)));
2319 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
2321 setFlags_DEP1(op8
, dst1
, ty
);
2324 if (gregOfRM(modrm
) < 7)
2325 putIReg(sz
, eregOfRM(modrm
), mkexpr(dst1
));
2327 delta
+= (am_sz
+ d_sz
);
2328 DIP("%s%c $0x%x, %s\n", nameGrp1(gregOfRM(modrm
)), nameISize(sz
), d32
,
2329 nameIReg(sz
,eregOfRM(modrm
)));
2331 addr
= disAMode ( &len
, sorb
, delta
, dis_buf
);
2333 assign(dst0
, loadLE(ty
,mkexpr(addr
)));
2334 assign(src
, mkU(ty
,d32
& mask
));
2336 if (gregOfRM(modrm
) == 2 /* ADC */) {
2338 /* cas-style store */
2339 helper_ADC( sz
, dst1
, dst0
, src
,
2340 /*store*/addr
, dst0
/*expVal*/, guest_EIP_curr_instr
);
2343 helper_ADC( sz
, dst1
, dst0
, src
,
2344 /*store*/addr
, IRTemp_INVALID
, 0 );
2347 if (gregOfRM(modrm
) == 3 /* SBB */) {
2349 /* cas-style store */
2350 helper_SBB( sz
, dst1
, dst0
, src
,
2351 /*store*/addr
, dst0
/*expVal*/, guest_EIP_curr_instr
);
2354 helper_SBB( sz
, dst1
, dst0
, src
,
2355 /*store*/addr
, IRTemp_INVALID
, 0 );
2358 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)));
2359 if (gregOfRM(modrm
) < 7) {
2361 casLE( mkexpr(addr
), mkexpr(dst0
)/*expVal*/,
2362 mkexpr(dst1
)/*newVal*/,
2363 guest_EIP_curr_instr
);
2365 storeLE(mkexpr(addr
), mkexpr(dst1
));
2369 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
2371 setFlags_DEP1(op8
, dst1
, ty
);
2374 delta
+= (len
+d_sz
);
2375 DIP("%s%c $0x%x, %s\n", nameGrp1(gregOfRM(modrm
)), nameISize(sz
),
2382 /* Group 2 extended opcodes. shift_expr must be an 8-bit typed
2386 UInt
dis_Grp2 ( UChar sorb
,
2387 Int delta
, UChar modrm
,
2388 Int am_sz
, Int d_sz
, Int sz
, IRExpr
* shift_expr
,
2389 const HChar
* shift_expr_txt
, Bool
* decode_OK
)
2391 /* delta on entry points at the modrm byte. */
2394 Bool isShift
, isRotate
, isRotateC
;
2395 IRType ty
= szToITy(sz
);
2396 IRTemp dst0
= newTemp(ty
);
2397 IRTemp dst1
= newTemp(ty
);
2398 IRTemp addr
= IRTemp_INVALID
;
2402 vassert(sz
== 1 || sz
== 2 || sz
== 4);
2404 /* Put value to shift/rotate in dst0. */
2405 if (epartIsReg(modrm
)) {
2406 assign(dst0
, getIReg(sz
, eregOfRM(modrm
)));
2407 delta
+= (am_sz
+ d_sz
);
2409 addr
= disAMode ( &len
, sorb
, delta
, dis_buf
);
2410 assign(dst0
, loadLE(ty
,mkexpr(addr
)));
2411 delta
+= len
+ d_sz
;
2415 switch (gregOfRM(modrm
)) { case 4: case 5: case 6: case 7: isShift
= True
; }
2418 switch (gregOfRM(modrm
)) { case 0: case 1: isRotate
= True
; }
2421 switch (gregOfRM(modrm
)) { case 2: case 3: isRotateC
= True
; }
2423 if (!isShift
&& !isRotate
&& !isRotateC
) {
2425 vpanic("dis_Grp2(Reg): unhandled case(x86)");
2429 /* call a helper; these insns are so ridiculous they do not
2431 Bool left
= toBool(gregOfRM(modrm
) == 2);
2432 IRTemp r64
= newTemp(Ity_I64
);
2434 = mkIRExprVec_4( widenUto32(mkexpr(dst0
)), /* thing to rotate */
2435 widenUto32(shift_expr
), /* rotate amount */
2436 widenUto32(mk_x86g_calculate_eflags_all()),
2438 assign( r64
, mkIRExprCCall(
2441 left
? "x86g_calculate_RCL" : "x86g_calculate_RCR",
2442 left
? &x86g_calculate_RCL
: &x86g_calculate_RCR
,
2446 /* new eflags in hi half r64; new value in lo half r64 */
2447 assign( dst1
, narrowTo(ty
, unop(Iop_64to32
, mkexpr(r64
))) );
2448 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
2449 stmt( IRStmt_Put( OFFB_CC_DEP1
, unop(Iop_64HIto32
, mkexpr(r64
)) ));
2450 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
2451 /* Set NDEP even though it isn't used. This makes redundant-PUT
2452 elimination of previous stores to this field work better. */
2453 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
2458 IRTemp pre32
= newTemp(Ity_I32
);
2459 IRTemp res32
= newTemp(Ity_I32
);
2460 IRTemp res32ss
= newTemp(Ity_I32
);
2461 IRTemp shift_amt
= newTemp(Ity_I8
);
2464 switch (gregOfRM(modrm
)) {
2465 case 4: op32
= Iop_Shl32
; break;
2466 case 5: op32
= Iop_Shr32
; break;
2467 case 6: op32
= Iop_Shl32
; break;
2468 case 7: op32
= Iop_Sar32
; break;
2470 default: vpanic("dis_Grp2:shift"); break;
2473 /* Widen the value to be shifted to 32 bits, do the shift, and
2474 narrow back down. This seems surprisingly long-winded, but
2475 unfortunately the Intel semantics requires that 8/16-bit
2476 shifts give defined results for shift values all the way up
2477 to 31, and this seems the simplest way to do it. It has the
2478 advantage that the only IR level shifts generated are of 32
2479 bit values, and the shift amount is guaranteed to be in the
2480 range 0 .. 31, thereby observing the IR semantics requiring
2481 all shift values to be in the range 0 .. 2^word_size-1. */
2483 /* shift_amt = shift_expr & 31, regardless of operation size */
2484 assign( shift_amt
, binop(Iop_And8
, shift_expr
, mkU8(31)) );
2486 /* suitably widen the value to be shifted to 32 bits. */
2487 assign( pre32
, op32
==Iop_Sar32
? widenSto32(mkexpr(dst0
))
2488 : widenUto32(mkexpr(dst0
)) );
2490 /* res32 = pre32 `shift` shift_amt */
2491 assign( res32
, binop(op32
, mkexpr(pre32
), mkexpr(shift_amt
)) );
2493 /* res32ss = pre32 `shift` ((shift_amt - 1) & 31) */
2499 mkexpr(shift_amt
), mkU8(1)),
2502 /* Build the flags thunk. */
2503 setFlags_DEP1_DEP2_shift(op32
, res32
, res32ss
, ty
, shift_amt
);
2505 /* Narrow the result back down. */
2506 assign( dst1
, narrowTo(ty
, mkexpr(res32
)) );
2508 } /* if (isShift) */
2512 Int ccOp
= ty
==Ity_I8
? 0 : (ty
==Ity_I16
? 1 : 2);
2513 Bool left
= toBool(gregOfRM(modrm
) == 0);
2514 IRTemp rot_amt
= newTemp(Ity_I8
);
2515 IRTemp rot_amt32
= newTemp(Ity_I8
);
2516 IRTemp oldFlags
= newTemp(Ity_I32
);
2518 /* rot_amt = shift_expr & mask */
2519 /* By masking the rotate amount thusly, the IR-level Shl/Shr
2520 expressions never shift beyond the word size and thus remain
2522 assign(rot_amt32
, binop(Iop_And8
, shift_expr
, mkU8(31)));
2525 assign(rot_amt
, mkexpr(rot_amt32
));
2527 assign(rot_amt
, binop(Iop_And8
, mkexpr(rot_amt32
), mkU8(8*sz
-1)));
2531 /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */
2533 binop( mkSizedOp(ty
,Iop_Or8
),
2534 binop( mkSizedOp(ty
,Iop_Shl8
),
2538 binop( mkSizedOp(ty
,Iop_Shr8
),
2540 binop(Iop_Sub8
,mkU8(8*sz
), mkexpr(rot_amt
))
2544 ccOp
+= X86G_CC_OP_ROLB
;
2546 } else { /* right */
2548 /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */
2550 binop( mkSizedOp(ty
,Iop_Or8
),
2551 binop( mkSizedOp(ty
,Iop_Shr8
),
2555 binop( mkSizedOp(ty
,Iop_Shl8
),
2557 binop(Iop_Sub8
,mkU8(8*sz
), mkexpr(rot_amt
))
2561 ccOp
+= X86G_CC_OP_RORB
;
2565 /* dst1 now holds the rotated value. Build flag thunk. We
2566 need the resulting value for this, and the previous flags.
2567 Except don't set it if the rotate count is zero. */
2569 assign(oldFlags
, mk_x86g_calculate_eflags_all());
2571 /* rot_amt32 :: Ity_I8. We need to convert it to I1. */
2572 IRTemp rot_amt32b
= newTemp(Ity_I1
);
2573 assign(rot_amt32b
, binop(Iop_CmpNE8
, mkexpr(rot_amt32
), mkU8(0)) );
2575 /* CC_DEP1 is the rotated value. CC_NDEP is flags before. */
2576 stmt( IRStmt_Put( OFFB_CC_OP
,
2577 IRExpr_ITE( mkexpr(rot_amt32b
),
2579 IRExpr_Get(OFFB_CC_OP
,Ity_I32
) ) ));
2580 stmt( IRStmt_Put( OFFB_CC_DEP1
,
2581 IRExpr_ITE( mkexpr(rot_amt32b
),
2582 widenUto32(mkexpr(dst1
)),
2583 IRExpr_Get(OFFB_CC_DEP1
,Ity_I32
) ) ));
2584 stmt( IRStmt_Put( OFFB_CC_DEP2
,
2585 IRExpr_ITE( mkexpr(rot_amt32b
),
2587 IRExpr_Get(OFFB_CC_DEP2
,Ity_I32
) ) ));
2588 stmt( IRStmt_Put( OFFB_CC_NDEP
,
2589 IRExpr_ITE( mkexpr(rot_amt32b
),
2591 IRExpr_Get(OFFB_CC_NDEP
,Ity_I32
) ) ));
2592 } /* if (isRotate) */
2594 /* Save result, and finish up. */
2595 if (epartIsReg(modrm
)) {
2596 putIReg(sz
, eregOfRM(modrm
), mkexpr(dst1
));
2597 if (vex_traceflags
& VEX_TRACE_FE
) {
2599 nameGrp2(gregOfRM(modrm
)), nameISize(sz
) );
2601 vex_printf("%s", shift_expr_txt
);
2603 ppIRExpr(shift_expr
);
2604 vex_printf(", %s\n", nameIReg(sz
,eregOfRM(modrm
)));
2607 storeLE(mkexpr(addr
), mkexpr(dst1
));
2608 if (vex_traceflags
& VEX_TRACE_FE
) {
2610 nameGrp2(gregOfRM(modrm
)), nameISize(sz
) );
2612 vex_printf("%s", shift_expr_txt
);
2614 ppIRExpr(shift_expr
);
2615 vex_printf(", %s\n", dis_buf
);
2622 /* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */
2624 UInt
dis_Grp8_Imm ( UChar sorb
,
2626 Int delta
, UChar modrm
,
2627 Int am_sz
, Int sz
, UInt src_val
,
2630 /* src_val denotes a d8.
2631 And delta on entry points at the modrm byte. */
2633 IRType ty
= szToITy(sz
);
2634 IRTemp t2
= newTemp(Ity_I32
);
2635 IRTemp t2m
= newTemp(Ity_I32
);
2636 IRTemp t_addr
= IRTemp_INVALID
;
2640 /* we're optimists :-) */
2643 /* Limit src_val -- the bit offset -- to something within a word.
2644 The Intel docs say that literal offsets larger than a word are
2645 masked in this way. */
2647 case 2: src_val
&= 15; break;
2648 case 4: src_val
&= 31; break;
2649 default: *decode_OK
= False
; return delta
;
2652 /* Invent a mask suitable for the operation. */
2653 switch (gregOfRM(modrm
)) {
2654 case 4: /* BT */ mask
= 0; break;
2655 case 5: /* BTS */ mask
= 1 << src_val
; break;
2656 case 6: /* BTR */ mask
= ~(1 << src_val
); break;
2657 case 7: /* BTC */ mask
= 1 << src_val
; break;
2658 /* If this needs to be extended, probably simplest to make a
2659 new function to handle the other cases (0 .. 3). The
2660 Intel docs do however not indicate any use for 0 .. 3, so
2661 we don't expect this to happen. */
2662 default: *decode_OK
= False
; return delta
;
2665 /* Fetch the value to be tested and modified into t2, which is
2666 32-bits wide regardless of sz. */
2667 if (epartIsReg(modrm
)) {
2668 vassert(am_sz
== 1);
2669 assign( t2
, widenUto32(getIReg(sz
, eregOfRM(modrm
))) );
2670 delta
+= (am_sz
+ 1);
2671 DIP("%s%c $0x%x, %s\n", nameGrp8(gregOfRM(modrm
)), nameISize(sz
),
2672 src_val
, nameIReg(sz
,eregOfRM(modrm
)));
2675 t_addr
= disAMode ( &len
, sorb
, delta
, dis_buf
);
2677 assign( t2
, widenUto32(loadLE(ty
, mkexpr(t_addr
))) );
2678 DIP("%s%c $0x%x, %s\n", nameGrp8(gregOfRM(modrm
)), nameISize(sz
),
2682 /* Compute the new value into t2m, if non-BT. */
2683 switch (gregOfRM(modrm
)) {
2687 assign( t2m
, binop(Iop_Or32
, mkU32(mask
), mkexpr(t2
)) );
2690 assign( t2m
, binop(Iop_And32
, mkU32(mask
), mkexpr(t2
)) );
2693 assign( t2m
, binop(Iop_Xor32
, mkU32(mask
), mkexpr(t2
)) );
2696 /*NOTREACHED*/ /*the previous switch guards this*/
2700 /* Write the result back, if non-BT. If the CAS fails then we
2701 side-exit from the trace at this point, and so the flag state is
2702 not affected. This is of course as required. */
2703 if (gregOfRM(modrm
) != 4 /* BT */) {
2704 if (epartIsReg(modrm
)) {
2705 putIReg(sz
, eregOfRM(modrm
), narrowTo(ty
, mkexpr(t2m
)));
2708 casLE( mkexpr(t_addr
),
2709 narrowTo(ty
, mkexpr(t2
))/*expd*/,
2710 narrowTo(ty
, mkexpr(t2m
))/*new*/,
2711 guest_EIP_curr_instr
);
2713 storeLE(mkexpr(t_addr
), narrowTo(ty
, mkexpr(t2m
)));
2718 /* Copy relevant bit from t2 into the carry flag. */
2719 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
2720 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
2721 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
2725 binop(Iop_Shr32
, mkexpr(t2
), mkU8(src_val
)),
2728 /* Set NDEP even though it isn't used. This makes redundant-PUT
2729 elimination of previous stores to this field work better. */
2730 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
2736 /* Signed/unsigned widening multiply. Generate IR to multiply the
2737 value in EAX/AX/AL by the given IRTemp, and park the result in
2740 static void codegen_mulL_A_D ( Int sz
, Bool syned
,
2741 IRTemp tmp
, const HChar
* tmp_txt
)
2743 IRType ty
= szToITy(sz
);
2744 IRTemp t1
= newTemp(ty
);
2746 assign( t1
, getIReg(sz
, R_EAX
) );
2750 IRTemp res64
= newTemp(Ity_I64
);
2751 IRTemp resHi
= newTemp(Ity_I32
);
2752 IRTemp resLo
= newTemp(Ity_I32
);
2753 IROp mulOp
= syned
? Iop_MullS32
: Iop_MullU32
;
2754 UInt tBaseOp
= syned
? X86G_CC_OP_SMULB
: X86G_CC_OP_UMULB
;
2755 setFlags_MUL ( Ity_I32
, t1
, tmp
, tBaseOp
);
2756 assign( res64
, binop(mulOp
, mkexpr(t1
), mkexpr(tmp
)) );
2757 assign( resHi
, unop(Iop_64HIto32
,mkexpr(res64
)));
2758 assign( resLo
, unop(Iop_64to32
,mkexpr(res64
)));
2759 putIReg(4, R_EDX
, mkexpr(resHi
));
2760 putIReg(4, R_EAX
, mkexpr(resLo
));
2764 IRTemp res32
= newTemp(Ity_I32
);
2765 IRTemp resHi
= newTemp(Ity_I16
);
2766 IRTemp resLo
= newTemp(Ity_I16
);
2767 IROp mulOp
= syned
? Iop_MullS16
: Iop_MullU16
;
2768 UInt tBaseOp
= syned
? X86G_CC_OP_SMULB
: X86G_CC_OP_UMULB
;
2769 setFlags_MUL ( Ity_I16
, t1
, tmp
, tBaseOp
);
2770 assign( res32
, binop(mulOp
, mkexpr(t1
), mkexpr(tmp
)) );
2771 assign( resHi
, unop(Iop_32HIto16
,mkexpr(res32
)));
2772 assign( resLo
, unop(Iop_32to16
,mkexpr(res32
)));
2773 putIReg(2, R_EDX
, mkexpr(resHi
));
2774 putIReg(2, R_EAX
, mkexpr(resLo
));
2778 IRTemp res16
= newTemp(Ity_I16
);
2779 IRTemp resHi
= newTemp(Ity_I8
);
2780 IRTemp resLo
= newTemp(Ity_I8
);
2781 IROp mulOp
= syned
? Iop_MullS8
: Iop_MullU8
;
2782 UInt tBaseOp
= syned
? X86G_CC_OP_SMULB
: X86G_CC_OP_UMULB
;
2783 setFlags_MUL ( Ity_I8
, t1
, tmp
, tBaseOp
);
2784 assign( res16
, binop(mulOp
, mkexpr(t1
), mkexpr(tmp
)) );
2785 assign( resHi
, unop(Iop_16HIto8
,mkexpr(res16
)));
2786 assign( resLo
, unop(Iop_16to8
,mkexpr(res16
)));
2787 putIReg(2, R_EAX
, mkexpr(res16
));
2791 vpanic("codegen_mulL_A_D(x86)");
2793 DIP("%s%c %s\n", syned
? "imul" : "mul", nameISize(sz
), tmp_txt
);
2797 /* Group 3 extended opcodes. */
2799 UInt
dis_Grp3 ( UChar sorb
, Bool locked
, Int sz
, Int delta
, Bool
* decode_OK
)
2806 IRType ty
= szToITy(sz
);
2807 IRTemp t1
= newTemp(ty
);
2808 IRTemp dst1
, src
, dst0
;
2810 *decode_OK
= True
; /* may change this later */
2812 modrm
= getIByte(delta
);
2814 if (locked
&& (gregOfRM(modrm
) != 2 && gregOfRM(modrm
) != 3)) {
2815 /* LOCK prefix only allowed with not and neg subopcodes */
2820 if (epartIsReg(modrm
)) {
2821 switch (gregOfRM(modrm
)) {
2822 case 0: { /* TEST */
2823 delta
++; d32
= getUDisp(sz
, delta
); delta
+= sz
;
2825 assign(dst1
, binop(mkSizedOp(ty
,Iop_And8
),
2826 getIReg(sz
,eregOfRM(modrm
)),
2828 setFlags_DEP1( Iop_And8
, dst1
, ty
);
2829 DIP("test%c $0x%x, %s\n", nameISize(sz
), d32
,
2830 nameIReg(sz
, eregOfRM(modrm
)));
2833 case 1: /* UNDEFINED */
2834 /* The Intel docs imply this insn is undefined and binutils
2835 agrees. Unfortunately Core 2 will run it (with who
2836 knows what result?) sandpile.org reckons it's an alias
2837 for case 0. We play safe. */
2842 putIReg(sz
, eregOfRM(modrm
),
2843 unop(mkSizedOp(ty
,Iop_Not8
),
2844 getIReg(sz
, eregOfRM(modrm
))));
2845 DIP("not%c %s\n", nameISize(sz
), nameIReg(sz
, eregOfRM(modrm
)));
2852 assign(dst0
, mkU(ty
,0));
2853 assign(src
, getIReg(sz
,eregOfRM(modrm
)));
2854 assign(dst1
, binop(mkSizedOp(ty
,Iop_Sub8
), mkexpr(dst0
), mkexpr(src
)));
2855 setFlags_DEP1_DEP2(Iop_Sub8
, dst0
, src
, ty
);
2856 putIReg(sz
, eregOfRM(modrm
), mkexpr(dst1
));
2857 DIP("neg%c %s\n", nameISize(sz
), nameIReg(sz
, eregOfRM(modrm
)));
2859 case 4: /* MUL (unsigned widening) */
2862 assign(src
, getIReg(sz
,eregOfRM(modrm
)));
2863 codegen_mulL_A_D ( sz
, False
, src
, nameIReg(sz
,eregOfRM(modrm
)) );
2865 case 5: /* IMUL (signed widening) */
2868 assign(src
, getIReg(sz
,eregOfRM(modrm
)));
2869 codegen_mulL_A_D ( sz
, True
, src
, nameIReg(sz
,eregOfRM(modrm
)) );
2873 assign( t1
, getIReg(sz
, eregOfRM(modrm
)) );
2874 codegen_div ( sz
, t1
, False
);
2875 DIP("div%c %s\n", nameISize(sz
), nameIReg(sz
, eregOfRM(modrm
)));
2879 assign( t1
, getIReg(sz
, eregOfRM(modrm
)) );
2880 codegen_div ( sz
, t1
, True
);
2881 DIP("idiv%c %s\n", nameISize(sz
), nameIReg(sz
, eregOfRM(modrm
)));
2884 /* This can't happen - gregOfRM should return 0 .. 7 only */
2885 vpanic("Grp3(x86)");
2888 addr
= disAMode ( &len
, sorb
, delta
, dis_buf
);
2891 assign(t1
, loadLE(ty
,mkexpr(addr
)));
2892 switch (gregOfRM(modrm
)) {
2893 case 0: { /* TEST */
2894 d32
= getUDisp(sz
, delta
); delta
+= sz
;
2896 assign(dst1
, binop(mkSizedOp(ty
,Iop_And8
),
2897 mkexpr(t1
), mkU(ty
,d32
)));
2898 setFlags_DEP1( Iop_And8
, dst1
, ty
);
2899 DIP("test%c $0x%x, %s\n", nameISize(sz
), d32
, dis_buf
);
2902 case 1: /* UNDEFINED */
2903 /* See comment above on R case */
2908 assign(dst1
, unop(mkSizedOp(ty
,Iop_Not8
), mkexpr(t1
)));
2910 casLE( mkexpr(addr
), mkexpr(t1
)/*expd*/, mkexpr(dst1
)/*new*/,
2911 guest_EIP_curr_instr
);
2913 storeLE( mkexpr(addr
), mkexpr(dst1
) );
2915 DIP("not%c %s\n", nameISize(sz
), dis_buf
);
2921 assign(dst0
, mkU(ty
,0));
2922 assign(src
, mkexpr(t1
));
2923 assign(dst1
, binop(mkSizedOp(ty
,Iop_Sub8
),
2924 mkexpr(dst0
), mkexpr(src
)));
2926 casLE( mkexpr(addr
), mkexpr(t1
)/*expd*/, mkexpr(dst1
)/*new*/,
2927 guest_EIP_curr_instr
);
2929 storeLE( mkexpr(addr
), mkexpr(dst1
) );
2931 setFlags_DEP1_DEP2(Iop_Sub8
, dst0
, src
, ty
);
2932 DIP("neg%c %s\n", nameISize(sz
), dis_buf
);
2935 codegen_mulL_A_D ( sz
, False
, t1
, dis_buf
);
2938 codegen_mulL_A_D ( sz
, True
, t1
, dis_buf
);
2941 codegen_div ( sz
, t1
, False
);
2942 DIP("div%c %s\n", nameISize(sz
), dis_buf
);
2945 codegen_div ( sz
, t1
, True
);
2946 DIP("idiv%c %s\n", nameISize(sz
), dis_buf
);
2949 /* This can't happen - gregOfRM should return 0 .. 7 only */
2950 vpanic("Grp3(x86)");
2957 /* Group 4 extended opcodes. */
2959 UInt
dis_Grp4 ( UChar sorb
, Bool locked
, Int delta
, Bool
* decode_OK
)
2965 IRTemp t1
= newTemp(ty
);
2966 IRTemp t2
= newTemp(ty
);
2970 modrm
= getIByte(delta
);
2972 if (locked
&& (gregOfRM(modrm
) != 0 && gregOfRM(modrm
) != 1)) {
2973 /* LOCK prefix only allowed with inc and dec subopcodes */
2978 if (epartIsReg(modrm
)) {
2979 assign(t1
, getIReg(1, eregOfRM(modrm
)));
2980 switch (gregOfRM(modrm
)) {
2982 assign(t2
, binop(Iop_Add8
, mkexpr(t1
), mkU8(1)));
2983 putIReg(1, eregOfRM(modrm
), mkexpr(t2
));
2984 setFlags_INC_DEC( True
, t2
, ty
);
2987 assign(t2
, binop(Iop_Sub8
, mkexpr(t1
), mkU8(1)));
2988 putIReg(1, eregOfRM(modrm
), mkexpr(t2
));
2989 setFlags_INC_DEC( False
, t2
, ty
);
2996 DIP("%sb %s\n", nameGrp4(gregOfRM(modrm
)),
2997 nameIReg(1, eregOfRM(modrm
)));
2999 IRTemp addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
3000 assign( t1
, loadLE(ty
, mkexpr(addr
)) );
3001 switch (gregOfRM(modrm
)) {
3003 assign(t2
, binop(Iop_Add8
, mkexpr(t1
), mkU8(1)));
3005 casLE( mkexpr(addr
), mkexpr(t1
)/*expd*/, mkexpr(t2
)/*new*/,
3006 guest_EIP_curr_instr
);
3008 storeLE( mkexpr(addr
), mkexpr(t2
) );
3010 setFlags_INC_DEC( True
, t2
, ty
);
3013 assign(t2
, binop(Iop_Sub8
, mkexpr(t1
), mkU8(1)));
3015 casLE( mkexpr(addr
), mkexpr(t1
)/*expd*/, mkexpr(t2
)/*new*/,
3016 guest_EIP_curr_instr
);
3018 storeLE( mkexpr(addr
), mkexpr(t2
) );
3020 setFlags_INC_DEC( False
, t2
, ty
);
3027 DIP("%sb %s\n", nameGrp4(gregOfRM(modrm
)), dis_buf
);
3033 /* Group 5 extended opcodes. */
3035 UInt
dis_Grp5 ( UChar sorb
, Bool locked
, Int sz
, Int delta
,
3036 /*MOD*/DisResult
* dres
, /*OUT*/Bool
* decode_OK
)
3041 IRTemp addr
= IRTemp_INVALID
;
3042 IRType ty
= szToITy(sz
);
3043 IRTemp t1
= newTemp(ty
);
3044 IRTemp t2
= IRTemp_INVALID
;
3048 modrm
= getIByte(delta
);
3050 if (locked
&& (gregOfRM(modrm
) != 0 && gregOfRM(modrm
) != 1)) {
3051 /* LOCK prefix only allowed with inc and dec subopcodes */
3056 if (epartIsReg(modrm
)) {
3057 assign(t1
, getIReg(sz
,eregOfRM(modrm
)));
3058 switch (gregOfRM(modrm
)) {
3060 vassert(sz
== 2 || sz
== 4);
3062 assign(t2
, binop(mkSizedOp(ty
,Iop_Add8
),
3063 mkexpr(t1
), mkU(ty
,1)));
3064 setFlags_INC_DEC( True
, t2
, ty
);
3065 putIReg(sz
,eregOfRM(modrm
),mkexpr(t2
));
3068 vassert(sz
== 2 || sz
== 4);
3070 assign(t2
, binop(mkSizedOp(ty
,Iop_Sub8
),
3071 mkexpr(t1
), mkU(ty
,1)));
3072 setFlags_INC_DEC( False
, t2
, ty
);
3073 putIReg(sz
,eregOfRM(modrm
),mkexpr(t2
));
3075 case 2: /* call Ev */
3077 t2
= newTemp(Ity_I32
);
3078 assign(t2
, binop(Iop_Sub32
, getIReg(4,R_ESP
), mkU32(4)));
3079 putIReg(4, R_ESP
, mkexpr(t2
));
3080 storeLE( mkexpr(t2
), mkU32(guest_EIP_bbstart
+delta
+1));
3081 jmp_treg(dres
, Ijk_Call
, t1
);
3082 vassert(dres
->whatNext
== Dis_StopHere
);
3084 case 4: /* jmp Ev */
3086 jmp_treg(dres
, Ijk_Boring
, t1
);
3087 vassert(dres
->whatNext
== Dis_StopHere
);
3089 case 6: /* PUSH Ev */
3090 vassert(sz
== 4 || sz
== 2);
3091 t2
= newTemp(Ity_I32
);
3092 assign( t2
, binop(Iop_Sub32
,getIReg(4,R_ESP
),mkU32(sz
)) );
3093 putIReg(4, R_ESP
, mkexpr(t2
) );
3094 storeLE( mkexpr(t2
), mkexpr(t1
) );
3101 DIP("%s%c %s\n", nameGrp5(gregOfRM(modrm
)),
3102 nameISize(sz
), nameIReg(sz
, eregOfRM(modrm
)));
3104 addr
= disAMode ( &len
, sorb
, delta
, dis_buf
);
3105 assign(t1
, loadLE(ty
,mkexpr(addr
)));
3106 switch (gregOfRM(modrm
)) {
3109 assign(t2
, binop(mkSizedOp(ty
,Iop_Add8
),
3110 mkexpr(t1
), mkU(ty
,1)));
3112 casLE( mkexpr(addr
),
3113 mkexpr(t1
), mkexpr(t2
), guest_EIP_curr_instr
);
3115 storeLE(mkexpr(addr
),mkexpr(t2
));
3117 setFlags_INC_DEC( True
, t2
, ty
);
3121 assign(t2
, binop(mkSizedOp(ty
,Iop_Sub8
),
3122 mkexpr(t1
), mkU(ty
,1)));
3124 casLE( mkexpr(addr
),
3125 mkexpr(t1
), mkexpr(t2
), guest_EIP_curr_instr
);
3127 storeLE(mkexpr(addr
),mkexpr(t2
));
3129 setFlags_INC_DEC( False
, t2
, ty
);
3131 case 2: /* call Ev */
3133 t2
= newTemp(Ity_I32
);
3134 assign(t2
, binop(Iop_Sub32
, getIReg(4,R_ESP
), mkU32(4)));
3135 putIReg(4, R_ESP
, mkexpr(t2
));
3136 storeLE( mkexpr(t2
), mkU32(guest_EIP_bbstart
+delta
+len
));
3137 jmp_treg(dres
, Ijk_Call
, t1
);
3138 vassert(dres
->whatNext
== Dis_StopHere
);
3140 case 4: /* JMP Ev */
3142 jmp_treg(dres
, Ijk_Boring
, t1
);
3143 vassert(dres
->whatNext
== Dis_StopHere
);
3145 case 6: /* PUSH Ev */
3146 vassert(sz
== 4 || sz
== 2);
3147 t2
= newTemp(Ity_I32
);
3148 assign( t2
, binop(Iop_Sub32
,getIReg(4,R_ESP
),mkU32(sz
)) );
3149 putIReg(4, R_ESP
, mkexpr(t2
) );
3150 storeLE( mkexpr(t2
), mkexpr(t1
) );
3157 DIP("%s%c %s\n", nameGrp5(gregOfRM(modrm
)),
3158 nameISize(sz
), dis_buf
);
3164 /*------------------------------------------------------------*/
3165 /*--- Disassembling string ops (including REP prefixes) ---*/
3166 /*------------------------------------------------------------*/
3168 /* Code shared by all the string ops */
3170 void dis_string_op_increment(Int sz
, IRTemp t_inc
)
3172 if (sz
== 4 || sz
== 2) {
3174 binop(Iop_Shl32
, IRExpr_Get( OFFB_DFLAG
, Ity_I32
),
3178 IRExpr_Get( OFFB_DFLAG
, Ity_I32
) );
3183 void dis_string_op( void (*dis_OP
)( Int
, IRTemp
),
3184 Int sz
, const HChar
* name
, UChar sorb
)
3186 IRTemp t_inc
= newTemp(Ity_I32
);
3187 vassert(sorb
== 0); /* hmm. so what was the point of passing it in? */
3188 dis_string_op_increment(sz
, t_inc
);
3189 dis_OP( sz
, t_inc
);
3190 DIP("%s%c\n", name
, nameISize(sz
));
3194 void dis_MOVS ( Int sz
, IRTemp t_inc
)
3196 IRType ty
= szToITy(sz
);
3197 IRTemp td
= newTemp(Ity_I32
); /* EDI */
3198 IRTemp ts
= newTemp(Ity_I32
); /* ESI */
3200 assign( td
, getIReg(4, R_EDI
) );
3201 assign( ts
, getIReg(4, R_ESI
) );
3203 storeLE( mkexpr(td
), loadLE(ty
,mkexpr(ts
)) );
3205 putIReg( 4, R_EDI
, binop(Iop_Add32
, mkexpr(td
), mkexpr(t_inc
)) );
3206 putIReg( 4, R_ESI
, binop(Iop_Add32
, mkexpr(ts
), mkexpr(t_inc
)) );
3210 void dis_LODS ( Int sz
, IRTemp t_inc
)
3212 IRType ty
= szToITy(sz
);
3213 IRTemp ts
= newTemp(Ity_I32
); /* ESI */
3215 assign( ts
, getIReg(4, R_ESI
) );
3217 putIReg( sz
, R_EAX
, loadLE(ty
, mkexpr(ts
)) );
3219 putIReg( 4, R_ESI
, binop(Iop_Add32
, mkexpr(ts
), mkexpr(t_inc
)) );
3223 void dis_STOS ( Int sz
, IRTemp t_inc
)
3225 IRType ty
= szToITy(sz
);
3226 IRTemp ta
= newTemp(ty
); /* EAX */
3227 IRTemp td
= newTemp(Ity_I32
); /* EDI */
3229 assign( ta
, getIReg(sz
, R_EAX
) );
3230 assign( td
, getIReg(4, R_EDI
) );
3232 storeLE( mkexpr(td
), mkexpr(ta
) );
3234 putIReg( 4, R_EDI
, binop(Iop_Add32
, mkexpr(td
), mkexpr(t_inc
)) );
3238 void dis_CMPS ( Int sz
, IRTemp t_inc
)
3240 IRType ty
= szToITy(sz
);
3241 IRTemp tdv
= newTemp(ty
); /* (EDI) */
3242 IRTemp tsv
= newTemp(ty
); /* (ESI) */
3243 IRTemp td
= newTemp(Ity_I32
); /* EDI */
3244 IRTemp ts
= newTemp(Ity_I32
); /* ESI */
3246 assign( td
, getIReg(4, R_EDI
) );
3247 assign( ts
, getIReg(4, R_ESI
) );
3249 assign( tdv
, loadLE(ty
,mkexpr(td
)) );
3250 assign( tsv
, loadLE(ty
,mkexpr(ts
)) );
3252 setFlags_DEP1_DEP2 ( Iop_Sub8
, tsv
, tdv
, ty
);
3254 putIReg(4, R_EDI
, binop(Iop_Add32
, mkexpr(td
), mkexpr(t_inc
)) );
3255 putIReg(4, R_ESI
, binop(Iop_Add32
, mkexpr(ts
), mkexpr(t_inc
)) );
3259 void dis_SCAS ( Int sz
, IRTemp t_inc
)
3261 IRType ty
= szToITy(sz
);
3262 IRTemp ta
= newTemp(ty
); /* EAX */
3263 IRTemp td
= newTemp(Ity_I32
); /* EDI */
3264 IRTemp tdv
= newTemp(ty
); /* (EDI) */
3266 assign( ta
, getIReg(sz
, R_EAX
) );
3267 assign( td
, getIReg(4, R_EDI
) );
3269 assign( tdv
, loadLE(ty
,mkexpr(td
)) );
3270 setFlags_DEP1_DEP2 ( Iop_Sub8
, ta
, tdv
, ty
);
3272 putIReg(4, R_EDI
, binop(Iop_Add32
, mkexpr(td
), mkexpr(t_inc
)) );
3276 /* Wrap the appropriate string op inside a REP/REPE/REPNE.
3277 We assume the insn is the last one in the basic block, and so emit a jump
3278 to the next insn, rather than just falling through. */
3280 void dis_REP_op ( /*MOD*/DisResult
* dres
,
3282 void (*dis_OP
)(Int
, IRTemp
),
3283 Int sz
, Addr32 eip
, Addr32 eip_next
, const HChar
* name
)
3285 IRTemp t_inc
= newTemp(Ity_I32
);
3286 IRTemp tc
= newTemp(Ity_I32
); /* ECX */
3288 assign( tc
, getIReg(4,R_ECX
) );
3290 stmt( IRStmt_Exit( binop(Iop_CmpEQ32
,mkexpr(tc
),mkU32(0)),
3292 IRConst_U32(eip_next
), OFFB_EIP
) );
3294 putIReg(4, R_ECX
, binop(Iop_Sub32
, mkexpr(tc
), mkU32(1)) );
3296 dis_string_op_increment(sz
, t_inc
);
3299 if (cond
== X86CondAlways
) {
3300 jmp_lit(dres
, Ijk_Boring
, eip
);
3301 vassert(dres
->whatNext
== Dis_StopHere
);
3303 stmt( IRStmt_Exit( mk_x86g_calculate_condition(cond
),
3305 IRConst_U32(eip
), OFFB_EIP
) );
3306 jmp_lit(dres
, Ijk_Boring
, eip_next
);
3307 vassert(dres
->whatNext
== Dis_StopHere
);
3309 DIP("%s%c\n", name
, nameISize(sz
));
3313 /*------------------------------------------------------------*/
3314 /*--- Arithmetic, etc. ---*/
3315 /*------------------------------------------------------------*/
3317 /* IMUL E, G. Supplied eip points to the modR/M byte. */
3319 UInt
dis_mul_E_G ( UChar sorb
,
3325 UChar rm
= getIByte(delta0
);
3326 IRType ty
= szToITy(size
);
3327 IRTemp te
= newTemp(ty
);
3328 IRTemp tg
= newTemp(ty
);
3329 IRTemp resLo
= newTemp(ty
);
3331 assign( tg
, getIReg(size
, gregOfRM(rm
)) );
3332 if (epartIsReg(rm
)) {
3333 assign( te
, getIReg(size
, eregOfRM(rm
)) );
3335 IRTemp addr
= disAMode( &alen
, sorb
, delta0
, dis_buf
);
3336 assign( te
, loadLE(ty
,mkexpr(addr
)) );
3339 setFlags_MUL ( ty
, te
, tg
, X86G_CC_OP_SMULB
);
3341 assign( resLo
, binop( mkSizedOp(ty
, Iop_Mul8
), mkexpr(te
), mkexpr(tg
) ) );
3343 putIReg(size
, gregOfRM(rm
), mkexpr(resLo
) );
3345 if (epartIsReg(rm
)) {
3346 DIP("imul%c %s, %s\n", nameISize(size
),
3347 nameIReg(size
,eregOfRM(rm
)),
3348 nameIReg(size
,gregOfRM(rm
)));
3351 DIP("imul%c %s, %s\n", nameISize(size
),
3352 dis_buf
, nameIReg(size
,gregOfRM(rm
)));
3358 /* IMUL I * E -> G. Supplied eip points to the modR/M byte. */
3360 UInt
dis_imul_I_E_G ( UChar sorb
,
3367 UChar rm
= getIByte(delta
);
3368 IRType ty
= szToITy(size
);
3369 IRTemp te
= newTemp(ty
);
3370 IRTemp tl
= newTemp(ty
);
3371 IRTemp resLo
= newTemp(ty
);
3373 vassert(size
== 1 || size
== 2 || size
== 4);
3375 if (epartIsReg(rm
)) {
3376 assign(te
, getIReg(size
, eregOfRM(rm
)));
3379 IRTemp addr
= disAMode( &alen
, sorb
, delta
, dis_buf
);
3380 assign(te
, loadLE(ty
, mkexpr(addr
)));
3383 d32
= getSDisp(litsize
,delta
);
3386 if (size
== 1) d32
&= 0xFF;
3387 if (size
== 2) d32
&= 0xFFFF;
3389 assign(tl
, mkU(ty
,d32
));
3391 assign( resLo
, binop( mkSizedOp(ty
, Iop_Mul8
), mkexpr(te
), mkexpr(tl
) ));
3393 setFlags_MUL ( ty
, te
, tl
, X86G_CC_OP_SMULB
);
3395 putIReg(size
, gregOfRM(rm
), mkexpr(resLo
));
3397 DIP("imul %d, %s, %s\n", d32
,
3398 ( epartIsReg(rm
) ? nameIReg(size
,eregOfRM(rm
)) : dis_buf
),
3399 nameIReg(size
,gregOfRM(rm
)) );
3404 /* Generate an IR sequence to do a count-leading-zeroes operation on
3405 the supplied IRTemp, and return a new IRTemp holding the result.
3406 'ty' may be Ity_I16 or Ity_I32 only. In the case where the
3407 argument is zero, return the number of bits in the word (the
3408 natural semantics). */
3409 static IRTemp
gen_LZCNT ( IRType ty
, IRTemp src
)
3411 vassert(ty
== Ity_I32
|| ty
== Ity_I16
);
3413 IRTemp src32
= newTemp(Ity_I32
);
3414 assign(src32
, widenUto32( mkexpr(src
) ));
3416 IRTemp src32x
= newTemp(Ity_I32
);
3418 binop(Iop_Shl32
, mkexpr(src32
),
3419 mkU8(32 - 8 * sizeofIRType(ty
))));
3421 // Clz32 has undefined semantics when its input is zero, so
3422 // special-case around that.
3423 IRTemp res32
= newTemp(Ity_I32
);
3426 binop(Iop_CmpEQ32
, mkexpr(src32x
), mkU32(0)),
3427 mkU32(8 * sizeofIRType(ty
)),
3428 unop(Iop_Clz32
, mkexpr(src32x
))
3431 IRTemp res
= newTemp(ty
);
3432 assign(res
, narrowTo(ty
, mkexpr(res32
)));
3437 /*------------------------------------------------------------*/
3439 /*--- x87 FLOATING POINT INSTRUCTIONS ---*/
3441 /*------------------------------------------------------------*/
3443 /* --- Helper functions for dealing with the register stack. --- */
3445 /* --- Set the emulation-warning pseudo-register. --- */
3447 static void put_emwarn ( IRExpr
* e
/* :: Ity_I32 */ )
3449 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I32
);
3450 stmt( IRStmt_Put( OFFB_EMNOTE
, e
) );
3453 /* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */
3455 static IRExpr
* mkQNaN64 ( void )
3457 /* QNaN is 0 2047 1 0(51times)
3458 == 0b 11111111111b 1 0(51times)
3459 == 0x7FF8 0000 0000 0000
3461 return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL
));
3464 /* --------- Get/put the top-of-stack pointer. --------- */
3466 static IRExpr
* get_ftop ( void )
3468 return IRExpr_Get( OFFB_FTOP
, Ity_I32
);
3471 static void put_ftop ( IRExpr
* e
)
3473 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I32
);
3474 stmt( IRStmt_Put( OFFB_FTOP
, e
) );
3477 /* --------- Get/put the C3210 bits. --------- */
3479 static IRExpr
* get_C3210 ( void )
3481 return IRExpr_Get( OFFB_FC3210
, Ity_I32
);
3484 static void put_C3210 ( IRExpr
* e
)
3486 stmt( IRStmt_Put( OFFB_FC3210
, e
) );
3489 /* --------- Get/put the FPU rounding mode. --------- */
3490 static IRExpr
* /* :: Ity_I32 */ get_fpround ( void )
3492 return IRExpr_Get( OFFB_FPROUND
, Ity_I32
);
3495 static void put_fpround ( IRExpr
* /* :: Ity_I32 */ e
)
3497 stmt( IRStmt_Put( OFFB_FPROUND
, e
) );
3501 /* --------- Synthesise a 2-bit FPU rounding mode. --------- */
3502 /* Produces a value in 0 .. 3, which is encoded as per the type
3503 IRRoundingMode. Since the guest_FPROUND value is also encoded as
3504 per IRRoundingMode, we merely need to get it and mask it for
3507 static IRExpr
* /* :: Ity_I32 */ get_roundingmode ( void )
3509 return binop( Iop_And32
, get_fpround(), mkU32(3) );
3512 static IRExpr
* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
3514 return mkU32(Irrm_NEAREST
);
3518 /* --------- Get/set FP register tag bytes. --------- */
3520 /* Given i, and some expression e, generate 'ST_TAG(i) = e'. */
3522 static void put_ST_TAG ( Int i
, IRExpr
* value
)
3525 vassert(typeOfIRExpr(irsb
->tyenv
, value
) == Ity_I8
);
3526 descr
= mkIRRegArray( OFFB_FPTAGS
, Ity_I8
, 8 );
3527 stmt( IRStmt_PutI( mkIRPutI(descr
, get_ftop(), i
, value
) ) );
3530 /* Given i, generate an expression yielding 'ST_TAG(i)'. This will be
3531 zero to indicate "Empty" and nonzero to indicate "NonEmpty". */
3533 static IRExpr
* get_ST_TAG ( Int i
)
3535 IRRegArray
* descr
= mkIRRegArray( OFFB_FPTAGS
, Ity_I8
, 8 );
3536 return IRExpr_GetI( descr
, get_ftop(), i
);
3540 /* --------- Get/set FP registers. --------- */
3542 /* Given i, and some expression e, emit 'ST(i) = e' and set the
3543 register's tag to indicate the register is full. The previous
3544 state of the register is not checked. */
3546 static void put_ST_UNCHECKED ( Int i
, IRExpr
* value
)
3549 vassert(typeOfIRExpr(irsb
->tyenv
, value
) == Ity_F64
);
3550 descr
= mkIRRegArray( OFFB_FPREGS
, Ity_F64
, 8 );
3551 stmt( IRStmt_PutI( mkIRPutI(descr
, get_ftop(), i
, value
) ) );
3552 /* Mark the register as in-use. */
3553 put_ST_TAG(i
, mkU8(1));
3556 /* Given i, and some expression e, emit
3557 ST(i) = is_full(i) ? NaN : e
3558 and set the tag accordingly.
3561 static void put_ST ( Int i
, IRExpr
* value
)
3565 IRExpr_ITE( binop(Iop_CmpNE8
, get_ST_TAG(i
), mkU8(0)),
3566 /* non-0 means full */
3575 /* Given i, generate an expression yielding 'ST(i)'. */
3577 static IRExpr
* get_ST_UNCHECKED ( Int i
)
3579 IRRegArray
* descr
= mkIRRegArray( OFFB_FPREGS
, Ity_F64
, 8 );
3580 return IRExpr_GetI( descr
, get_ftop(), i
);
3584 /* Given i, generate an expression yielding
3585 is_full(i) ? ST(i) : NaN
3588 static IRExpr
* get_ST ( Int i
)
3591 IRExpr_ITE( binop(Iop_CmpNE8
, get_ST_TAG(i
), mkU8(0)),
3592 /* non-0 means full */
3593 get_ST_UNCHECKED(i
),
3599 /* Given i, and some expression e, and a condition cond, generate IR
3600 which has the same effect as put_ST(i,e) when cond is true and has
3601 no effect when cond is false. Given the lack of proper
3602 if-then-else in the IR, this is pretty tricky.
3605 static void maybe_put_ST ( IRTemp cond
, Int i
, IRExpr
* value
)
3607 // new_tag = if cond then FULL else old_tag
3608 // new_val = if cond then (if old_tag==FULL then NaN else val)
3611 IRTemp old_tag
= newTemp(Ity_I8
);
3612 assign(old_tag
, get_ST_TAG(i
));
3613 IRTemp new_tag
= newTemp(Ity_I8
);
3615 IRExpr_ITE(mkexpr(cond
), mkU8(1)/*FULL*/, mkexpr(old_tag
)));
3617 IRTemp old_val
= newTemp(Ity_F64
);
3618 assign(old_val
, get_ST_UNCHECKED(i
));
3619 IRTemp new_val
= newTemp(Ity_F64
);
3621 IRExpr_ITE(mkexpr(cond
),
3622 IRExpr_ITE(binop(Iop_CmpNE8
, mkexpr(old_tag
), mkU8(0)),
3623 /* non-0 means full */
3629 put_ST_UNCHECKED(i
, mkexpr(new_val
));
3630 // put_ST_UNCHECKED incorrectly sets tag(i) to always be FULL. So
3631 // now set it to new_tag instead.
3632 put_ST_TAG(i
, mkexpr(new_tag
));
3635 /* Adjust FTOP downwards by one register. */
3637 static void fp_push ( void )
3639 put_ftop( binop(Iop_Sub32
, get_ftop(), mkU32(1)) );
3642 /* Adjust FTOP downwards by one register when COND is 1:I1. Else
3645 static void maybe_fp_push ( IRTemp cond
)
3647 put_ftop( binop(Iop_Sub32
, get_ftop(), unop(Iop_1Uto32
,mkexpr(cond
))) );
3650 /* Adjust FTOP upwards by one register, and mark the vacated register
3653 static void fp_pop ( void )
3655 put_ST_TAG(0, mkU8(0));
3656 put_ftop( binop(Iop_Add32
, get_ftop(), mkU32(1)) );
3659 /* Set the C2 bit of the FPU status register to e[0]. Assumes that
3662 static void set_C2 ( IRExpr
* e
)
3664 IRExpr
* cleared
= binop(Iop_And32
, get_C3210(), mkU32(~X86G_FC_MASK_C2
));
3665 put_C3210( binop(Iop_Or32
,
3667 binop(Iop_Shl32
, e
, mkU8(X86G_FC_SHIFT_C2
))) );
3670 /* Generate code to check that abs(d64) < 2^63 and is finite. This is
3671 used to do the range checks for FSIN, FCOS, FSINCOS and FPTAN. The
3672 test is simple, but the derivation of it is not so simple.
3674 The exponent field for an IEEE754 double is 11 bits. That means it
3675 can take values 0 through 0x7FF. If the exponent has value 0x7FF,
3676 the number is either a NaN or an Infinity and so is not finite.
3677 Furthermore, a finite value of exactly 2^63 is the smallest value
3678 that has exponent value 0x43E. Hence, what we need to do is
3679 extract the exponent, ignoring the sign bit and mantissa, and check
3680 it is < 0x43E, or <= 0x43D.
3682 To make this easily applicable to 32- and 64-bit targets, a
3683 roundabout approach is used. First the number is converted to I64,
3684 then the top 32 bits are taken. Shifting them right by 20 bits
3685 places the sign bit and exponent in the bottom 12 bits. Anding
3686 with 0x7FF gets rid of the sign bit, leaving just the exponent
3687 available for comparison.
3689 static IRTemp
math_IS_TRIG_ARG_FINITE_AND_IN_RANGE ( IRTemp d64
)
3691 IRTemp i64
= newTemp(Ity_I64
);
3692 assign(i64
, unop(Iop_ReinterpF64asI64
, mkexpr(d64
)) );
3693 IRTemp exponent
= newTemp(Ity_I32
);
3696 binop(Iop_Shr32
, unop(Iop_64HIto32
, mkexpr(i64
)), mkU8(20)),
3698 IRTemp in_range_and_finite
= newTemp(Ity_I1
);
3699 assign(in_range_and_finite
,
3700 binop(Iop_CmpLE32U
, mkexpr(exponent
), mkU32(0x43D)));
3701 return in_range_and_finite
;
3704 /* Invent a plausible-looking FPU status word value:
3705 ((ftop & 7) << 11) | (c3210 & 0x4700)
3707 static IRExpr
* get_FPU_sw ( void )
3713 binop(Iop_And32
, get_ftop(), mkU32(7)),
3715 binop(Iop_And32
, get_C3210(), mkU32(0x4700))
3720 /* ------------------------------------------------------- */
3721 /* Given all that stack-mangling junk, we can now go ahead
3722 and describe FP instructions.
3725 /* ST(0) = ST(0) `op` mem64/32(addr)
3726 Need to check ST(0)'s tag on read, but not on write.
3729 void fp_do_op_mem_ST_0 ( IRTemp addr
, const HChar
* op_txt
, HChar
* dis_buf
,
3732 DIP("f%s%c %s\n", op_txt
, dbl
?'l':'s', dis_buf
);
3736 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3738 loadLE(Ity_F64
,mkexpr(addr
))
3743 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3745 unop(Iop_F32toF64
, loadLE(Ity_F32
,mkexpr(addr
)))
3751 /* ST(0) = mem64/32(addr) `op` ST(0)
3752 Need to check ST(0)'s tag on read, but not on write.
3755 void fp_do_oprev_mem_ST_0 ( IRTemp addr
, const HChar
* op_txt
, HChar
* dis_buf
,
3758 DIP("f%s%c %s\n", op_txt
, dbl
?'l':'s', dis_buf
);
3762 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3763 loadLE(Ity_F64
,mkexpr(addr
)),
3769 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3770 unop(Iop_F32toF64
, loadLE(Ity_F32
,mkexpr(addr
))),
3777 /* ST(dst) = ST(dst) `op` ST(src).
3778 Check dst and src tags when reading but not on write.
3781 void fp_do_op_ST_ST ( const HChar
* op_txt
, IROp op
, UInt st_src
, UInt st_dst
,
3784 DIP("f%s%s st(%u), st(%u)\n", op_txt
, pop_after
?"p":"",
3789 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3797 /* ST(dst) = ST(src) `op` ST(dst).
3798 Check dst and src tags when reading but not on write.
3801 void fp_do_oprev_ST_ST ( const HChar
* op_txt
, IROp op
, UInt st_src
,
3802 UInt st_dst
, Bool pop_after
)
3804 DIP("f%s%s st(%u), st(%u)\n", op_txt
, pop_after
?"p":"",
3809 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
3817 /* %eflags(Z,P,C) = UCOMI( st(0), st(i) ) */
3818 static void fp_do_ucomi_ST0_STi ( UInt i
, Bool pop_after
)
3820 DIP("fucomi%s %%st(0),%%st(%u)\n", pop_after
? "p" : "", i
);
3821 /* This is a bit of a hack (and isn't really right). It sets
3822 Z,P,C,O correctly, but forces A and S to zero, whereas the Intel
3823 documentation implies A and S are unchanged.
3825 /* It's also fishy in that it is used both for COMIP and
3826 UCOMIP, and they aren't the same (although similar). */
3827 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
3828 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
3829 stmt( IRStmt_Put( OFFB_CC_DEP1
,
3831 binop(Iop_CmpF64
, get_ST(0), get_ST(i
)),
3834 /* Set NDEP even though it isn't used. This makes redundant-PUT
3835 elimination of previous stores to this field work better. */
3836 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
3843 UInt
dis_FPU ( Bool
* decode_ok
, UChar sorb
, Int delta
)
3850 /* On entry, delta points at the second byte of the insn (the modrm
3852 UChar first_opcode
= getIByte(delta
-1);
3853 UChar modrm
= getIByte(delta
+0);
3855 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */
3857 if (first_opcode
== 0xD8) {
3860 /* bits 5,4,3 are an opcode extension, and the modRM also
3861 specifies an address. */
3862 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
3865 switch (gregOfRM(modrm
)) {
3867 case 0: /* FADD single-real */
3868 fp_do_op_mem_ST_0 ( addr
, "add", dis_buf
, Iop_AddF64
, False
);
3871 case 1: /* FMUL single-real */
3872 fp_do_op_mem_ST_0 ( addr
, "mul", dis_buf
, Iop_MulF64
, False
);
3875 case 2: /* FCOM single-real */
3876 DIP("fcoms %s\n", dis_buf
);
3877 /* This forces C1 to zero, which isn't right. */
3884 loadLE(Ity_F32
,mkexpr(addr
)))),
3890 case 3: /* FCOMP single-real */
3891 DIP("fcomps %s\n", dis_buf
);
3892 /* This forces C1 to zero, which isn't right. */
3899 loadLE(Ity_F32
,mkexpr(addr
)))),
3906 case 4: /* FSUB single-real */
3907 fp_do_op_mem_ST_0 ( addr
, "sub", dis_buf
, Iop_SubF64
, False
);
3910 case 5: /* FSUBR single-real */
3911 fp_do_oprev_mem_ST_0 ( addr
, "subr", dis_buf
, Iop_SubF64
, False
);
3914 case 6: /* FDIV single-real */
3915 fp_do_op_mem_ST_0 ( addr
, "div", dis_buf
, Iop_DivF64
, False
);
3918 case 7: /* FDIVR single-real */
3919 fp_do_oprev_mem_ST_0 ( addr
, "divr", dis_buf
, Iop_DivF64
, False
);
3923 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt
)gregOfRM(modrm
));
3924 vex_printf("first_opcode == 0xD8\n");
3931 case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */
3932 fp_do_op_ST_ST ( "add", Iop_AddF64
, modrm
- 0xC0, 0, False
);
3935 case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */
3936 fp_do_op_ST_ST ( "mul", Iop_MulF64
, modrm
- 0xC8, 0, False
);
3939 /* Dunno if this is right */
3940 case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */
3941 r_dst
= (UInt
)modrm
- 0xD0;
3942 DIP("fcom %%st(0),%%st(%u)\n", r_dst
);
3943 /* This forces C1 to zero, which isn't right. */
3947 binop(Iop_CmpF64
, get_ST(0), get_ST(r_dst
)),
3953 /* Dunno if this is right */
3954 case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */
3955 r_dst
= (UInt
)modrm
- 0xD8;
3956 DIP("fcomp %%st(0),%%st(%u)\n", r_dst
);
3957 /* This forces C1 to zero, which isn't right. */
3961 binop(Iop_CmpF64
, get_ST(0), get_ST(r_dst
)),
3968 case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */
3969 fp_do_op_ST_ST ( "sub", Iop_SubF64
, modrm
- 0xE0, 0, False
);
3972 case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */
3973 fp_do_oprev_ST_ST ( "subr", Iop_SubF64
, modrm
- 0xE8, 0, False
);
3976 case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */
3977 fp_do_op_ST_ST ( "div", Iop_DivF64
, modrm
- 0xF0, 0, False
);
3980 case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */
3981 fp_do_oprev_ST_ST ( "divr", Iop_DivF64
, modrm
- 0xF8, 0, False
);
3990 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */
3992 if (first_opcode
== 0xD9) {
3995 /* bits 5,4,3 are an opcode extension, and the modRM also
3996 specifies an address. */
3997 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
4000 switch (gregOfRM(modrm
)) {
4002 case 0: /* FLD single-real */
4003 DIP("flds %s\n", dis_buf
);
4005 put_ST(0, unop(Iop_F32toF64
,
4006 loadLE(Ity_F32
, mkexpr(addr
))));
4009 case 2: /* FST single-real */
4010 DIP("fsts %s\n", dis_buf
);
4011 storeLE(mkexpr(addr
),
4012 binop(Iop_F64toF32
, get_roundingmode(), get_ST(0)));
4015 case 3: /* FSTP single-real */
4016 DIP("fstps %s\n", dis_buf
);
4017 storeLE(mkexpr(addr
),
4018 binop(Iop_F64toF32
, get_roundingmode(), get_ST(0)));
4022 case 4: { /* FLDENV m28 */
4023 /* Uses dirty helper:
4024 VexEmNote x86g_do_FLDENV ( VexGuestX86State*, HWord ) */
4025 IRTemp ew
= newTemp(Ity_I32
);
4026 IRDirty
* d
= unsafeIRDirty_0_N (
4028 "x86g_dirtyhelper_FLDENV",
4029 &x86g_dirtyhelper_FLDENV
,
4030 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
4033 /* declare we're reading memory */
4035 d
->mAddr
= mkexpr(addr
);
4038 /* declare we're writing guest state */
4040 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
4042 d
->fxState
[0].fx
= Ifx_Write
;
4043 d
->fxState
[0].offset
= OFFB_FTOP
;
4044 d
->fxState
[0].size
= sizeof(UInt
);
4046 d
->fxState
[1].fx
= Ifx_Write
;
4047 d
->fxState
[1].offset
= OFFB_FPTAGS
;
4048 d
->fxState
[1].size
= 8 * sizeof(UChar
);
4050 d
->fxState
[2].fx
= Ifx_Write
;
4051 d
->fxState
[2].offset
= OFFB_FPROUND
;
4052 d
->fxState
[2].size
= sizeof(UInt
);
4054 d
->fxState
[3].fx
= Ifx_Write
;
4055 d
->fxState
[3].offset
= OFFB_FC3210
;
4056 d
->fxState
[3].size
= sizeof(UInt
);
4058 stmt( IRStmt_Dirty(d
) );
4060 /* ew contains any emulation warning we may need to
4061 issue. If needed, side-exit to the next insn,
4062 reporting the warning, so that Valgrind's dispatcher
4063 sees the warning. */
4064 put_emwarn( mkexpr(ew
) );
4067 binop(Iop_CmpNE32
, mkexpr(ew
), mkU32(0)),
4069 IRConst_U32( ((Addr32
)guest_EIP_bbstart
)+delta
),
4074 DIP("fldenv %s\n", dis_buf
);
4078 case 5: {/* FLDCW */
4079 /* The only thing we observe in the control word is the
4080 rounding mode. Therefore, pass the 16-bit value
4081 (x87 native-format control word) to a clean helper,
4082 getting back a 64-bit value, the lower half of which
4083 is the FPROUND value to store, and the upper half of
4084 which is the emulation-warning token which may be
4087 /* ULong x86h_check_fldcw ( UInt ); */
4088 IRTemp t64
= newTemp(Ity_I64
);
4089 IRTemp ew
= newTemp(Ity_I32
);
4090 DIP("fldcw %s\n", dis_buf
);
4091 assign( t64
, mkIRExprCCall(
4092 Ity_I64
, 0/*regparms*/,
4097 loadLE(Ity_I16
, mkexpr(addr
)))
4102 put_fpround( unop(Iop_64to32
, mkexpr(t64
)) );
4103 assign( ew
, unop(Iop_64HIto32
, mkexpr(t64
) ) );
4104 put_emwarn( mkexpr(ew
) );
4105 /* Finally, if an emulation warning was reported,
4106 side-exit to the next insn, reporting the warning,
4107 so that Valgrind's dispatcher sees the warning. */
4110 binop(Iop_CmpNE32
, mkexpr(ew
), mkU32(0)),
4112 IRConst_U32( ((Addr32
)guest_EIP_bbstart
)+delta
),
4119 case 6: { /* FNSTENV m28 */
4120 /* Uses dirty helper:
4121 void x86g_do_FSTENV ( VexGuestX86State*, HWord ) */
4122 IRDirty
* d
= unsafeIRDirty_0_N (
4124 "x86g_dirtyhelper_FSTENV",
4125 &x86g_dirtyhelper_FSTENV
,
4126 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
4128 /* declare we're writing memory */
4130 d
->mAddr
= mkexpr(addr
);
4133 /* declare we're reading guest state */
4135 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
4137 d
->fxState
[0].fx
= Ifx_Read
;
4138 d
->fxState
[0].offset
= OFFB_FTOP
;
4139 d
->fxState
[0].size
= sizeof(UInt
);
4141 d
->fxState
[1].fx
= Ifx_Read
;
4142 d
->fxState
[1].offset
= OFFB_FPTAGS
;
4143 d
->fxState
[1].size
= 8 * sizeof(UChar
);
4145 d
->fxState
[2].fx
= Ifx_Read
;
4146 d
->fxState
[2].offset
= OFFB_FPROUND
;
4147 d
->fxState
[2].size
= sizeof(UInt
);
4149 d
->fxState
[3].fx
= Ifx_Read
;
4150 d
->fxState
[3].offset
= OFFB_FC3210
;
4151 d
->fxState
[3].size
= sizeof(UInt
);
4153 stmt( IRStmt_Dirty(d
) );
4155 DIP("fnstenv %s\n", dis_buf
);
4159 case 7: /* FNSTCW */
4160 /* Fake up a native x87 FPU control word. The only
4161 thing it depends on is FPROUND[1:0], so call a clean
4162 helper to cook it up. */
4163 /* UInt x86h_create_fpucw ( UInt fpround ) */
4164 DIP("fnstcw %s\n", dis_buf
);
4170 "x86g_create_fpucw", &x86g_create_fpucw
,
4171 mkIRExprVec_1( get_fpround() )
4178 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt
)gregOfRM(modrm
));
4179 vex_printf("first_opcode == 0xD9\n");
4187 case 0xC0 ... 0xC7: /* FLD %st(?) */
4188 r_src
= (UInt
)modrm
- 0xC0;
4189 DIP("fld %%st(%u)\n", r_src
);
4190 t1
= newTemp(Ity_F64
);
4191 assign(t1
, get_ST(r_src
));
4193 put_ST(0, mkexpr(t1
));
4196 case 0xC8 ... 0xCF: /* FXCH %st(?) */
4197 r_src
= (UInt
)modrm
- 0xC8;
4198 DIP("fxch %%st(%u)\n", r_src
);
4199 t1
= newTemp(Ity_F64
);
4200 t2
= newTemp(Ity_F64
);
4201 assign(t1
, get_ST(0));
4202 assign(t2
, get_ST(r_src
));
4203 put_ST_UNCHECKED(0, mkexpr(t2
));
4204 put_ST_UNCHECKED(r_src
, mkexpr(t1
));
4207 case 0xE0: /* FCHS */
4209 put_ST_UNCHECKED(0, unop(Iop_NegF64
, get_ST(0)));
4212 case 0xE1: /* FABS */
4214 put_ST_UNCHECKED(0, unop(Iop_AbsF64
, get_ST(0)));
4217 case 0xE4: /* FTST */
4219 /* This forces C1 to zero, which isn't right. */
4220 /* Well, in fact the Intel docs say (bizarrely): "C1 is
4221 set to 0 if stack underflow occurred; otherwise, set
4222 to 0" which is pretty nonsensical. I guess it's a
4229 IRExpr_Const(IRConst_F64i(0x0ULL
))),
4235 case 0xE5: { /* FXAM */
4236 /* This is an interesting one. It examines %st(0),
4237 regardless of whether the tag says it's empty or not.
4238 Here, just pass both the tag (in our format) and the
4239 value (as a double, actually a ULong) to a helper
4242 = mkIRExprVec_2( unop(Iop_8Uto32
, get_ST_TAG(0)),
4243 unop(Iop_ReinterpF64asI64
,
4244 get_ST_UNCHECKED(0)) );
4245 put_C3210(mkIRExprCCall(
4248 "x86g_calculate_FXAM", &x86g_calculate_FXAM
,
4255 case 0xE8: /* FLD1 */
4258 /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */
4259 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL
)));
4262 case 0xE9: /* FLDL2T */
4265 /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */
4266 put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL
)));
4269 case 0xEA: /* FLDL2E */
4272 /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */
4273 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL
)));
4276 case 0xEB: /* FLDPI */
4279 /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */
4280 put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL
)));
4283 case 0xEC: /* FLDLG2 */
4286 /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */
4287 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL
)));
4290 case 0xED: /* FLDLN2 */
4293 /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */
4294 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL
)));
4297 case 0xEE: /* FLDZ */
4300 /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */
4301 put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL
)));
4304 case 0xF0: /* F2XM1 */
4308 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4312 case 0xF1: /* FYL2X */
4316 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4322 case 0xF2: { /* FPTAN */
4324 IRTemp argD
= newTemp(Ity_F64
);
4325 assign(argD
, get_ST(0));
4326 IRTemp argOK
= math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD
);
4327 IRTemp resD
= newTemp(Ity_F64
);
4332 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4336 put_ST_UNCHECKED(0, mkexpr(resD
));
4337 /* Conditionally push 1.0 on the stack, if the arg is
4339 maybe_fp_push(argOK
);
4340 maybe_put_ST(argOK
, 0,
4341 IRExpr_Const(IRConst_F64(1.0)));
4342 set_C2( binop(Iop_Xor32
,
4343 unop(Iop_1Uto32
, mkexpr(argOK
)),
4348 case 0xF3: /* FPATAN */
4352 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4358 case 0xF4: { /* FXTRACT */
4359 IRTemp argF
= newTemp(Ity_F64
);
4360 IRTemp sigF
= newTemp(Ity_F64
);
4361 IRTemp expF
= newTemp(Ity_F64
);
4362 IRTemp argI
= newTemp(Ity_I64
);
4363 IRTemp sigI
= newTemp(Ity_I64
);
4364 IRTemp expI
= newTemp(Ity_I64
);
4366 assign( argF
, get_ST(0) );
4367 assign( argI
, unop(Iop_ReinterpF64asI64
, mkexpr(argF
)));
4370 Ity_I64
, 0/*regparms*/,
4371 "x86amd64g_calculate_FXTRACT",
4372 &x86amd64g_calculate_FXTRACT
,
4373 mkIRExprVec_2( mkexpr(argI
),
4374 mkIRExpr_HWord(0)/*sig*/ ))
4378 Ity_I64
, 0/*regparms*/,
4379 "x86amd64g_calculate_FXTRACT",
4380 &x86amd64g_calculate_FXTRACT
,
4381 mkIRExprVec_2( mkexpr(argI
),
4382 mkIRExpr_HWord(1)/*exp*/ ))
4384 assign( sigF
, unop(Iop_ReinterpI64asF64
, mkexpr(sigI
)) );
4385 assign( expF
, unop(Iop_ReinterpI64asF64
, mkexpr(expI
)) );
4387 put_ST_UNCHECKED(0, mkexpr(expF
) );
4390 put_ST(0, mkexpr(sigF
) );
4394 case 0xF5: { /* FPREM1 -- IEEE compliant */
4395 IRTemp a1
= newTemp(Ity_F64
);
4396 IRTemp a2
= newTemp(Ity_F64
);
4398 /* Do FPREM1 twice, once to get the remainder, and once
4399 to get the C3210 flag values. */
4400 assign( a1
, get_ST(0) );
4401 assign( a2
, get_ST(1) );
4404 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4408 triop(Iop_PRem1C3210F64
,
4409 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4415 case 0xF7: /* FINCSTP */
4417 put_ftop( binop(Iop_Add32
, get_ftop(), mkU32(1)) );
4420 case 0xF8: { /* FPREM -- not IEEE compliant */
4421 IRTemp a1
= newTemp(Ity_F64
);
4422 IRTemp a2
= newTemp(Ity_F64
);
4424 /* Do FPREM twice, once to get the remainder, and once
4425 to get the C3210 flag values. */
4426 assign( a1
, get_ST(0) );
4427 assign( a2
, get_ST(1) );
4430 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4434 triop(Iop_PRemC3210F64
,
4435 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4441 case 0xF9: /* FYL2XP1 */
4444 triop(Iop_Yl2xp1F64
,
4445 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4451 case 0xFA: /* FSQRT */
4455 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4459 case 0xFB: { /* FSINCOS */
4461 IRTemp argD
= newTemp(Ity_F64
);
4462 assign(argD
, get_ST(0));
4463 IRTemp argOK
= math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD
);
4464 IRTemp resD
= newTemp(Ity_F64
);
4469 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4473 put_ST_UNCHECKED(0, mkexpr(resD
));
4474 /* Conditionally push the cos value on the stack, if
4475 the arg is in range */
4476 maybe_fp_push(argOK
);
4477 maybe_put_ST(argOK
, 0,
4479 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4481 set_C2( binop(Iop_Xor32
,
4482 unop(Iop_1Uto32
, mkexpr(argOK
)),
4487 case 0xFC: /* FRNDINT */
4490 binop(Iop_RoundF64toInt
, get_roundingmode(), get_ST(0)) );
4493 case 0xFD: /* FSCALE */
4497 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4502 case 0xFE: /* FSIN */
4503 case 0xFF: { /* FCOS */
4504 Bool isSIN
= modrm
== 0xFE;
4505 DIP("%s\n", isSIN
? "fsin" : "fcos");
4506 IRTemp argD
= newTemp(Ity_F64
);
4507 assign(argD
, get_ST(0));
4508 IRTemp argOK
= math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD
);
4509 IRTemp resD
= newTemp(Ity_F64
);
4513 binop(isSIN
? Iop_SinF64
: Iop_CosF64
,
4514 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4518 put_ST_UNCHECKED(0, mkexpr(resD
));
4519 set_C2( binop(Iop_Xor32
,
4520 unop(Iop_1Uto32
, mkexpr(argOK
)),
4531 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */
4533 if (first_opcode
== 0xDA) {
4537 /* bits 5,4,3 are an opcode extension, and the modRM also
4538 specifies an address. */
4540 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
4542 switch (gregOfRM(modrm
)) {
4544 case 0: /* FIADD m32int */ /* ST(0) += m32int */
4545 DIP("fiaddl %s\n", dis_buf
);
4549 case 1: /* FIMUL m32int */ /* ST(0) *= m32int */
4550 DIP("fimull %s\n", dis_buf
);
4554 case 2: /* FICOM m32int */
4555 DIP("ficoml %s\n", dis_buf
);
4556 /* This forces C1 to zero, which isn't right. */
4563 loadLE(Ity_I32
,mkexpr(addr
)))),
4569 case 3: /* FICOMP m32int */
4570 DIP("ficompl %s\n", dis_buf
);
4571 /* This forces C1 to zero, which isn't right. */
4578 loadLE(Ity_I32
,mkexpr(addr
)))),
4585 case 4: /* FISUB m32int */ /* ST(0) -= m32int */
4586 DIP("fisubl %s\n", dis_buf
);
4590 case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */
4591 DIP("fisubrl %s\n", dis_buf
);
4595 case 6: /* FIDIV m32int */ /* ST(0) /= m32int */
4596 DIP("fidivl %s\n", dis_buf
);
4600 case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */
4601 DIP("fidivrl %s\n", dis_buf
);
4608 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4611 loadLE(Ity_I32
, mkexpr(addr
)))));
4617 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
4619 loadLE(Ity_I32
, mkexpr(addr
))),
4624 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt
)gregOfRM(modrm
));
4625 vex_printf("first_opcode == 0xDA\n");
4634 case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */
4635 r_src
= (UInt
)modrm
- 0xC0;
4636 DIP("fcmovb %%st(%u), %%st(0)\n", r_src
);
4639 mk_x86g_calculate_condition(X86CondB
),
4640 get_ST(r_src
), get_ST(0)) );
4643 case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */
4644 r_src
= (UInt
)modrm
- 0xC8;
4645 DIP("fcmovz %%st(%u), %%st(0)\n", r_src
);
4648 mk_x86g_calculate_condition(X86CondZ
),
4649 get_ST(r_src
), get_ST(0)) );
4652 case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */
4653 r_src
= (UInt
)modrm
- 0xD0;
4654 DIP("fcmovbe %%st(%u), %%st(0)\n", r_src
);
4657 mk_x86g_calculate_condition(X86CondBE
),
4658 get_ST(r_src
), get_ST(0)) );
4661 case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */
4662 r_src
= (UInt
)modrm
- 0xD8;
4663 DIP("fcmovu %%st(%u), %%st(0)\n", r_src
);
4666 mk_x86g_calculate_condition(X86CondP
),
4667 get_ST(r_src
), get_ST(0)) );
4670 case 0xE9: /* FUCOMPP %st(0),%st(1) */
4671 DIP("fucompp %%st(0),%%st(1)\n");
4672 /* This forces C1 to zero, which isn't right. */
4676 binop(Iop_CmpF64
, get_ST(0), get_ST(1)),
4691 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */
4693 if (first_opcode
== 0xDB) {
4696 /* bits 5,4,3 are an opcode extension, and the modRM also
4697 specifies an address. */
4698 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
4701 switch (gregOfRM(modrm
)) {
4703 case 0: /* FILD m32int */
4704 DIP("fildl %s\n", dis_buf
);
4706 put_ST(0, unop(Iop_I32StoF64
,
4707 loadLE(Ity_I32
, mkexpr(addr
))));
4710 case 1: /* FISTTPL m32 (SSE3) */
4711 DIP("fisttpl %s\n", dis_buf
);
4712 storeLE( mkexpr(addr
),
4713 binop(Iop_F64toI32S
, mkU32(Irrm_ZERO
), get_ST(0)) );
4717 case 2: /* FIST m32 */
4718 DIP("fistl %s\n", dis_buf
);
4719 storeLE( mkexpr(addr
),
4720 binop(Iop_F64toI32S
, get_roundingmode(), get_ST(0)) );
4723 case 3: /* FISTP m32 */
4724 DIP("fistpl %s\n", dis_buf
);
4725 storeLE( mkexpr(addr
),
4726 binop(Iop_F64toI32S
, get_roundingmode(), get_ST(0)) );
4730 case 5: { /* FLD extended-real */
4731 /* Uses dirty helper:
4732 ULong x86g_loadF80le ( UInt )
4733 addr holds the address. First, do a dirty call to
4734 get hold of the data. */
4735 IRTemp val
= newTemp(Ity_I64
);
4736 IRExpr
** args
= mkIRExprVec_1 ( mkexpr(addr
) );
4738 IRDirty
* d
= unsafeIRDirty_1_N (
4741 "x86g_dirtyhelper_loadF80le",
4742 &x86g_dirtyhelper_loadF80le
,
4745 /* declare that we're reading memory */
4747 d
->mAddr
= mkexpr(addr
);
4750 /* execute the dirty call, dumping the result in val. */
4751 stmt( IRStmt_Dirty(d
) );
4753 put_ST(0, unop(Iop_ReinterpI64asF64
, mkexpr(val
)));
4755 DIP("fldt %s\n", dis_buf
);
4759 case 7: { /* FSTP extended-real */
4760 /* Uses dirty helper: void x86g_storeF80le ( UInt, ULong ) */
4762 = mkIRExprVec_2( mkexpr(addr
),
4763 unop(Iop_ReinterpF64asI64
, get_ST(0)) );
4765 IRDirty
* d
= unsafeIRDirty_0_N (
4767 "x86g_dirtyhelper_storeF80le",
4768 &x86g_dirtyhelper_storeF80le
,
4771 /* declare we're writing memory */
4773 d
->mAddr
= mkexpr(addr
);
4776 /* execute the dirty call. */
4777 stmt( IRStmt_Dirty(d
) );
4780 DIP("fstpt\n %s", dis_buf
);
4785 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt
)gregOfRM(modrm
));
4786 vex_printf("first_opcode == 0xDB\n");
4795 case 0xC0 ... 0xC7: /* FCMOVNB ST(i), ST(0) */
4796 r_src
= (UInt
)modrm
- 0xC0;
4797 DIP("fcmovnb %%st(%u), %%st(0)\n", r_src
);
4800 mk_x86g_calculate_condition(X86CondNB
),
4801 get_ST(r_src
), get_ST(0)) );
4804 case 0xC8 ... 0xCF: /* FCMOVNE(NZ) ST(i), ST(0) */
4805 r_src
= (UInt
)modrm
- 0xC8;
4806 DIP("fcmovnz %%st(%u), %%st(0)\n", r_src
);
4809 mk_x86g_calculate_condition(X86CondNZ
),
4810 get_ST(r_src
), get_ST(0)) );
4813 case 0xD0 ... 0xD7: /* FCMOVNBE ST(i), ST(0) */
4814 r_src
= (UInt
)modrm
- 0xD0;
4815 DIP("fcmovnbe %%st(%u), %%st(0)\n", r_src
);
4818 mk_x86g_calculate_condition(X86CondNBE
),
4819 get_ST(r_src
), get_ST(0)) );
4822 case 0xD8 ... 0xDF: /* FCMOVNU ST(i), ST(0) */
4823 r_src
= (UInt
)modrm
- 0xD8;
4824 DIP("fcmovnu %%st(%u), %%st(0)\n", r_src
);
4827 mk_x86g_calculate_condition(X86CondNP
),
4828 get_ST(r_src
), get_ST(0)) );
4836 /* Uses dirty helper:
4837 void x86g_do_FINIT ( VexGuestX86State* ) */
4838 IRDirty
* d
= unsafeIRDirty_0_N (
4840 "x86g_dirtyhelper_FINIT",
4841 &x86g_dirtyhelper_FINIT
,
4842 mkIRExprVec_1(IRExpr_GSPTR())
4845 /* declare we're writing guest state */
4847 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
4849 d
->fxState
[0].fx
= Ifx_Write
;
4850 d
->fxState
[0].offset
= OFFB_FTOP
;
4851 d
->fxState
[0].size
= sizeof(UInt
);
4853 d
->fxState
[1].fx
= Ifx_Write
;
4854 d
->fxState
[1].offset
= OFFB_FPREGS
;
4855 d
->fxState
[1].size
= 8 * sizeof(ULong
);
4857 d
->fxState
[2].fx
= Ifx_Write
;
4858 d
->fxState
[2].offset
= OFFB_FPTAGS
;
4859 d
->fxState
[2].size
= 8 * sizeof(UChar
);
4861 d
->fxState
[3].fx
= Ifx_Write
;
4862 d
->fxState
[3].offset
= OFFB_FPROUND
;
4863 d
->fxState
[3].size
= sizeof(UInt
);
4865 d
->fxState
[4].fx
= Ifx_Write
;
4866 d
->fxState
[4].offset
= OFFB_FC3210
;
4867 d
->fxState
[4].size
= sizeof(UInt
);
4869 stmt( IRStmt_Dirty(d
) );
4875 case 0xE8 ... 0xEF: /* FUCOMI %st(0),%st(?) */
4876 fp_do_ucomi_ST0_STi( (UInt
)modrm
- 0xE8, False
);
4879 case 0xF0 ... 0xF7: /* FCOMI %st(0),%st(?) */
4880 fp_do_ucomi_ST0_STi( (UInt
)modrm
- 0xF0, False
);
4889 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */
4891 if (first_opcode
== 0xDC) {
4894 /* bits 5,4,3 are an opcode extension, and the modRM also
4895 specifies an address. */
4896 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
4899 switch (gregOfRM(modrm
)) {
4901 case 0: /* FADD double-real */
4902 fp_do_op_mem_ST_0 ( addr
, "add", dis_buf
, Iop_AddF64
, True
);
4905 case 1: /* FMUL double-real */
4906 fp_do_op_mem_ST_0 ( addr
, "mul", dis_buf
, Iop_MulF64
, True
);
4909 case 2: /* FCOM double-real */
4910 DIP("fcoml %s\n", dis_buf
);
4911 /* This forces C1 to zero, which isn't right. */
4917 loadLE(Ity_F64
,mkexpr(addr
))),
4923 case 3: /* FCOMP double-real */
4924 DIP("fcompl %s\n", dis_buf
);
4925 /* This forces C1 to zero, which isn't right. */
4931 loadLE(Ity_F64
,mkexpr(addr
))),
4938 case 4: /* FSUB double-real */
4939 fp_do_op_mem_ST_0 ( addr
, "sub", dis_buf
, Iop_SubF64
, True
);
4942 case 5: /* FSUBR double-real */
4943 fp_do_oprev_mem_ST_0 ( addr
, "subr", dis_buf
, Iop_SubF64
, True
);
4946 case 6: /* FDIV double-real */
4947 fp_do_op_mem_ST_0 ( addr
, "div", dis_buf
, Iop_DivF64
, True
);
4950 case 7: /* FDIVR double-real */
4951 fp_do_oprev_mem_ST_0 ( addr
, "divr", dis_buf
, Iop_DivF64
, True
);
4955 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt
)gregOfRM(modrm
));
4956 vex_printf("first_opcode == 0xDC\n");
4965 case 0xC0 ... 0xC7: /* FADD %st(0),%st(?) */
4966 fp_do_op_ST_ST ( "add", Iop_AddF64
, 0, modrm
- 0xC0, False
);
4969 case 0xC8 ... 0xCF: /* FMUL %st(0),%st(?) */
4970 fp_do_op_ST_ST ( "mul", Iop_MulF64
, 0, modrm
- 0xC8, False
);
4973 case 0xE0 ... 0xE7: /* FSUBR %st(0),%st(?) */
4974 fp_do_oprev_ST_ST ( "subr", Iop_SubF64
, 0, modrm
- 0xE0, False
);
4977 case 0xE8 ... 0xEF: /* FSUB %st(0),%st(?) */
4978 fp_do_op_ST_ST ( "sub", Iop_SubF64
, 0, modrm
- 0xE8, False
);
4981 case 0xF0 ... 0xF7: /* FDIVR %st(0),%st(?) */
4982 fp_do_oprev_ST_ST ( "divr", Iop_DivF64
, 0, modrm
- 0xF0, False
);
4985 case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */
4986 fp_do_op_ST_ST ( "div", Iop_DivF64
, 0, modrm
- 0xF8, False
);
4996 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */
4998 if (first_opcode
== 0xDD) {
5002 /* bits 5,4,3 are an opcode extension, and the modRM also
5003 specifies an address. */
5004 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
5007 switch (gregOfRM(modrm
)) {
5009 case 0: /* FLD double-real */
5010 DIP("fldl %s\n", dis_buf
);
5012 put_ST(0, loadLE(Ity_F64
, mkexpr(addr
)));
5015 case 1: /* FISTTPQ m64 (SSE3) */
5016 DIP("fistppll %s\n", dis_buf
);
5017 storeLE( mkexpr(addr
),
5018 binop(Iop_F64toI64S
, mkU32(Irrm_ZERO
), get_ST(0)) );
5022 case 2: /* FST double-real */
5023 DIP("fstl %s\n", dis_buf
);
5024 storeLE(mkexpr(addr
), get_ST(0));
5027 case 3: /* FSTP double-real */
5028 DIP("fstpl %s\n", dis_buf
);
5029 storeLE(mkexpr(addr
), get_ST(0));
5033 case 4: { /* FRSTOR m108 */
5034 /* Uses dirty helper:
5035 VexEmNote x86g_do_FRSTOR ( VexGuestX86State*, Addr32 ) */
5036 IRTemp ew
= newTemp(Ity_I32
);
5037 IRDirty
* d
= unsafeIRDirty_0_N (
5039 "x86g_dirtyhelper_FRSTOR",
5040 &x86g_dirtyhelper_FRSTOR
,
5041 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
5044 /* declare we're reading memory */
5046 d
->mAddr
= mkexpr(addr
);
5049 /* declare we're writing guest state */
5051 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
5053 d
->fxState
[0].fx
= Ifx_Write
;
5054 d
->fxState
[0].offset
= OFFB_FTOP
;
5055 d
->fxState
[0].size
= sizeof(UInt
);
5057 d
->fxState
[1].fx
= Ifx_Write
;
5058 d
->fxState
[1].offset
= OFFB_FPREGS
;
5059 d
->fxState
[1].size
= 8 * sizeof(ULong
);
5061 d
->fxState
[2].fx
= Ifx_Write
;
5062 d
->fxState
[2].offset
= OFFB_FPTAGS
;
5063 d
->fxState
[2].size
= 8 * sizeof(UChar
);
5065 d
->fxState
[3].fx
= Ifx_Write
;
5066 d
->fxState
[3].offset
= OFFB_FPROUND
;
5067 d
->fxState
[3].size
= sizeof(UInt
);
5069 d
->fxState
[4].fx
= Ifx_Write
;
5070 d
->fxState
[4].offset
= OFFB_FC3210
;
5071 d
->fxState
[4].size
= sizeof(UInt
);
5073 stmt( IRStmt_Dirty(d
) );
5075 /* ew contains any emulation warning we may need to
5076 issue. If needed, side-exit to the next insn,
5077 reporting the warning, so that Valgrind's dispatcher
5078 sees the warning. */
5079 put_emwarn( mkexpr(ew
) );
5082 binop(Iop_CmpNE32
, mkexpr(ew
), mkU32(0)),
5084 IRConst_U32( ((Addr32
)guest_EIP_bbstart
)+delta
),
5089 DIP("frstor %s\n", dis_buf
);
5093 case 6: { /* FNSAVE m108 */
5094 /* Uses dirty helper:
5095 void x86g_do_FSAVE ( VexGuestX86State*, UInt ) */
5096 IRDirty
* d
= unsafeIRDirty_0_N (
5098 "x86g_dirtyhelper_FSAVE",
5099 &x86g_dirtyhelper_FSAVE
,
5100 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
5102 /* declare we're writing memory */
5104 d
->mAddr
= mkexpr(addr
);
5107 /* declare we're reading guest state */
5109 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
5111 d
->fxState
[0].fx
= Ifx_Read
;
5112 d
->fxState
[0].offset
= OFFB_FTOP
;
5113 d
->fxState
[0].size
= sizeof(UInt
);
5115 d
->fxState
[1].fx
= Ifx_Read
;
5116 d
->fxState
[1].offset
= OFFB_FPREGS
;
5117 d
->fxState
[1].size
= 8 * sizeof(ULong
);
5119 d
->fxState
[2].fx
= Ifx_Read
;
5120 d
->fxState
[2].offset
= OFFB_FPTAGS
;
5121 d
->fxState
[2].size
= 8 * sizeof(UChar
);
5123 d
->fxState
[3].fx
= Ifx_Read
;
5124 d
->fxState
[3].offset
= OFFB_FPROUND
;
5125 d
->fxState
[3].size
= sizeof(UInt
);
5127 d
->fxState
[4].fx
= Ifx_Read
;
5128 d
->fxState
[4].offset
= OFFB_FC3210
;
5129 d
->fxState
[4].size
= sizeof(UInt
);
5131 stmt( IRStmt_Dirty(d
) );
5133 DIP("fnsave %s\n", dis_buf
);
5137 case 7: { /* FNSTSW m16 */
5138 IRExpr
* sw
= get_FPU_sw();
5139 vassert(typeOfIRExpr(irsb
->tyenv
, sw
) == Ity_I16
);
5140 storeLE( mkexpr(addr
), sw
);
5141 DIP("fnstsw %s\n", dis_buf
);
5146 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt
)gregOfRM(modrm
));
5147 vex_printf("first_opcode == 0xDD\n");
5154 case 0xC0 ... 0xC7: /* FFREE %st(?) */
5155 r_dst
= (UInt
)modrm
- 0xC0;
5156 DIP("ffree %%st(%u)\n", r_dst
);
5157 put_ST_TAG ( r_dst
, mkU8(0) );
5160 case 0xD0 ... 0xD7: /* FST %st(0),%st(?) */
5161 r_dst
= (UInt
)modrm
- 0xD0;
5162 DIP("fst %%st(0),%%st(%u)\n", r_dst
);
5163 /* P4 manual says: "If the destination operand is a
5164 non-empty register, the invalid-operation exception
5165 is not generated. Hence put_ST_UNCHECKED. */
5166 put_ST_UNCHECKED(r_dst
, get_ST(0));
5169 case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */
5170 r_dst
= (UInt
)modrm
- 0xD8;
5171 DIP("fstp %%st(0),%%st(%u)\n", r_dst
);
5172 /* P4 manual says: "If the destination operand is a
5173 non-empty register, the invalid-operation exception
5174 is not generated. Hence put_ST_UNCHECKED. */
5175 put_ST_UNCHECKED(r_dst
, get_ST(0));
5179 case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */
5180 r_dst
= (UInt
)modrm
- 0xE0;
5181 DIP("fucom %%st(0),%%st(%u)\n", r_dst
);
5182 /* This forces C1 to zero, which isn't right. */
5186 binop(Iop_CmpF64
, get_ST(0), get_ST(r_dst
)),
5192 case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */
5193 r_dst
= (UInt
)modrm
- 0xE8;
5194 DIP("fucomp %%st(0),%%st(%u)\n", r_dst
);
5195 /* This forces C1 to zero, which isn't right. */
5199 binop(Iop_CmpF64
, get_ST(0), get_ST(r_dst
)),
5212 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */
5214 if (first_opcode
== 0xDE) {
5218 /* bits 5,4,3 are an opcode extension, and the modRM also
5219 specifies an address. */
5221 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
5224 switch (gregOfRM(modrm
)) {
5226 case 0: /* FIADD m16int */ /* ST(0) += m16int */
5227 DIP("fiaddw %s\n", dis_buf
);
5231 case 1: /* FIMUL m16int */ /* ST(0) *= m16int */
5232 DIP("fimulw %s\n", dis_buf
);
5236 case 2: /* FICOM m16int */
5237 DIP("ficomw %s\n", dis_buf
);
5238 /* This forces C1 to zero, which isn't right. */
5246 loadLE(Ity_I16
,mkexpr(addr
))))),
5252 case 3: /* FICOMP m16int */
5253 DIP("ficompw %s\n", dis_buf
);
5254 /* This forces C1 to zero, which isn't right. */
5262 loadLE(Ity_I16
,mkexpr(addr
))))),
5269 case 4: /* FISUB m16int */ /* ST(0) -= m16int */
5270 DIP("fisubw %s\n", dis_buf
);
5274 case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */
5275 DIP("fisubrw %s\n", dis_buf
);
5279 case 6: /* FIDIV m16int */ /* ST(0) /= m16int */
5280 DIP("fisubw %s\n", dis_buf
);
5284 case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */
5285 DIP("fidivrw %s\n", dis_buf
);
5292 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5296 loadLE(Ity_I16
, mkexpr(addr
))))));
5302 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5305 loadLE(Ity_I16
, mkexpr(addr
)))),
5310 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt
)gregOfRM(modrm
));
5311 vex_printf("first_opcode == 0xDE\n");
5320 case 0xC0 ... 0xC7: /* FADDP %st(0),%st(?) */
5321 fp_do_op_ST_ST ( "add", Iop_AddF64
, 0, modrm
- 0xC0, True
);
5324 case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */
5325 fp_do_op_ST_ST ( "mul", Iop_MulF64
, 0, modrm
- 0xC8, True
);
5328 case 0xD9: /* FCOMPP %st(0),%st(1) */
5329 DIP("fuompp %%st(0),%%st(1)\n");
5330 /* This forces C1 to zero, which isn't right. */
5334 binop(Iop_CmpF64
, get_ST(0), get_ST(1)),
5342 case 0xE0 ... 0xE7: /* FSUBRP %st(0),%st(?) */
5343 fp_do_oprev_ST_ST ( "subr", Iop_SubF64
, 0, modrm
- 0xE0, True
);
5346 case 0xE8 ... 0xEF: /* FSUBP %st(0),%st(?) */
5347 fp_do_op_ST_ST ( "sub", Iop_SubF64
, 0, modrm
- 0xE8, True
);
5350 case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */
5351 fp_do_oprev_ST_ST ( "divr", Iop_DivF64
, 0, modrm
- 0xF0, True
);
5354 case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */
5355 fp_do_op_ST_ST ( "div", Iop_DivF64
, 0, modrm
- 0xF8, True
);
5365 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */
5367 if (first_opcode
== 0xDF) {
5371 /* bits 5,4,3 are an opcode extension, and the modRM also
5372 specifies an address. */
5373 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
5376 switch (gregOfRM(modrm
)) {
5378 case 0: /* FILD m16int */
5379 DIP("fildw %s\n", dis_buf
);
5381 put_ST(0, unop(Iop_I32StoF64
,
5383 loadLE(Ity_I16
, mkexpr(addr
)))));
5386 case 1: /* FISTTPS m16 (SSE3) */
5387 DIP("fisttps %s\n", dis_buf
);
5388 storeLE( mkexpr(addr
),
5389 binop(Iop_F64toI16S
, mkU32(Irrm_ZERO
), get_ST(0)) );
5393 case 2: /* FIST m16 */
5394 DIP("fistp %s\n", dis_buf
);
5395 storeLE( mkexpr(addr
),
5396 binop(Iop_F64toI16S
, get_roundingmode(), get_ST(0)) );
5399 case 3: /* FISTP m16 */
5400 DIP("fistps %s\n", dis_buf
);
5401 storeLE( mkexpr(addr
),
5402 binop(Iop_F64toI16S
, get_roundingmode(), get_ST(0)) );
5406 case 5: /* FILD m64 */
5407 DIP("fildll %s\n", dis_buf
);
5409 put_ST(0, binop(Iop_I64StoF64
,
5411 loadLE(Ity_I64
, mkexpr(addr
))));
5414 case 7: /* FISTP m64 */
5415 DIP("fistpll %s\n", dis_buf
);
5416 storeLE( mkexpr(addr
),
5417 binop(Iop_F64toI64S
, get_roundingmode(), get_ST(0)) );
5422 vex_printf("unhandled opc_aux = 0x%2x\n", (UInt
)gregOfRM(modrm
));
5423 vex_printf("first_opcode == 0xDF\n");
5432 case 0xC0: /* FFREEP %st(0) */
5433 DIP("ffreep %%st(%d)\n", 0);
5434 put_ST_TAG ( 0, mkU8(0) );
5438 case 0xE0: /* FNSTSW %ax */
5439 DIP("fnstsw %%ax\n");
5440 /* Get the FPU status word value and dump it in %AX. */
5442 /* The obvious thing to do is simply dump the 16-bit
5443 status word value in %AX. However, due to a
5444 limitation in Memcheck's origin tracking
5445 machinery, this causes Memcheck not to track the
5446 origin of any undefinedness into %AH (only into
5447 %AL/%AX/%EAX), which means origins are lost in
5448 the sequence "fnstsw %ax; test $M,%ah; jcond .." */
5449 putIReg(2, R_EAX
, get_FPU_sw());
5451 /* So a somewhat lame kludge is to make it very
5452 clear to Memcheck that the value is written to
5453 both %AH and %AL. This generates marginally
5454 worse code, but I don't think it matters much. */
5455 IRTemp t16
= newTemp(Ity_I16
);
5456 assign(t16
, get_FPU_sw());
5457 putIReg( 1, R_AL
, unop(Iop_16to8
, mkexpr(t16
)) );
5458 putIReg( 1, R_AH
, unop(Iop_16HIto8
, mkexpr(t16
)) );
5462 case 0xE8 ... 0xEF: /* FUCOMIP %st(0),%st(?) */
5463 fp_do_ucomi_ST0_STi( (UInt
)modrm
- 0xE8, True
);
5466 case 0xF0 ... 0xF7: /* FCOMIP %st(0),%st(?) */
5467 /* not really right since COMIP != UCOMIP */
5468 fp_do_ucomi_ST0_STi( (UInt
)modrm
- 0xF0, True
);
5479 vpanic("dis_FPU(x86): invalid primary opcode");
5490 /*------------------------------------------------------------*/
5492 /*--- MMX INSTRUCTIONS ---*/
5494 /*------------------------------------------------------------*/
5496 /* Effect of MMX insns on x87 FPU state (table 11-2 of
5497 IA32 arch manual, volume 3):
5499 Read from, or write to MMX register (viz, any insn except EMMS):
5500 * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero
5501 * FP stack pointer set to zero
5504 * All tags set to Invalid (empty) -- FPTAGS[i] := zero
5505 * FP stack pointer set to zero
5508 static void do_MMX_preamble ( void )
5511 IRRegArray
* descr
= mkIRRegArray( OFFB_FPTAGS
, Ity_I8
, 8 );
5512 IRExpr
* zero
= mkU32(0);
5513 IRExpr
* tag1
= mkU8(1);
5515 for (i
= 0; i
< 8; i
++)
5516 stmt( IRStmt_PutI( mkIRPutI(descr
, zero
, i
, tag1
) ) );
5519 static void do_EMMS_preamble ( void )
5522 IRRegArray
* descr
= mkIRRegArray( OFFB_FPTAGS
, Ity_I8
, 8 );
5523 IRExpr
* zero
= mkU32(0);
5524 IRExpr
* tag0
= mkU8(0);
5526 for (i
= 0; i
< 8; i
++)
5527 stmt( IRStmt_PutI( mkIRPutI(descr
, zero
, i
, tag0
) ) );
5531 static IRExpr
* getMMXReg ( UInt archreg
)
5533 vassert(archreg
< 8);
5534 return IRExpr_Get( OFFB_FPREGS
+ 8 * archreg
, Ity_I64
);
5538 static void putMMXReg ( UInt archreg
, IRExpr
* e
)
5540 vassert(archreg
< 8);
5541 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I64
);
5542 stmt( IRStmt_Put( OFFB_FPREGS
+ 8 * archreg
, e
) );
5546 /* Helper for non-shift MMX insns. Note this is incomplete in the
5547 sense that it does not first call do_MMX_preamble() -- that is the
5548 responsibility of its caller. */
5551 UInt
dis_MMXop_regmem_to_reg ( UChar sorb
,
5555 Bool show_granularity
)
5558 UChar modrm
= getIByte(delta
);
5559 Bool isReg
= epartIsReg(modrm
);
5560 IRExpr
* argL
= NULL
;
5561 IRExpr
* argR
= NULL
;
5562 IRExpr
* argG
= NULL
;
5563 IRExpr
* argE
= NULL
;
5564 IRTemp res
= newTemp(Ity_I64
);
5567 IROp op
= Iop_INVALID
;
5570 const HChar
* hName
= NULL
;
5572 # define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0)
5575 /* Original MMX ones */
5576 case 0xFC: op
= Iop_Add8x8
; break;
5577 case 0xFD: op
= Iop_Add16x4
; break;
5578 case 0xFE: op
= Iop_Add32x2
; break;
5580 case 0xEC: op
= Iop_QAdd8Sx8
; break;
5581 case 0xED: op
= Iop_QAdd16Sx4
; break;
5583 case 0xDC: op
= Iop_QAdd8Ux8
; break;
5584 case 0xDD: op
= Iop_QAdd16Ux4
; break;
5586 case 0xF8: op
= Iop_Sub8x8
; break;
5587 case 0xF9: op
= Iop_Sub16x4
; break;
5588 case 0xFA: op
= Iop_Sub32x2
; break;
5590 case 0xE8: op
= Iop_QSub8Sx8
; break;
5591 case 0xE9: op
= Iop_QSub16Sx4
; break;
5593 case 0xD8: op
= Iop_QSub8Ux8
; break;
5594 case 0xD9: op
= Iop_QSub16Ux4
; break;
5596 case 0xE5: op
= Iop_MulHi16Sx4
; break;
5597 case 0xD5: op
= Iop_Mul16x4
; break;
5598 case 0xF5: XXX(x86g_calculate_mmx_pmaddwd
); break;
5600 case 0x74: op
= Iop_CmpEQ8x8
; break;
5601 case 0x75: op
= Iop_CmpEQ16x4
; break;
5602 case 0x76: op
= Iop_CmpEQ32x2
; break;
5604 case 0x64: op
= Iop_CmpGT8Sx8
; break;
5605 case 0x65: op
= Iop_CmpGT16Sx4
; break;
5606 case 0x66: op
= Iop_CmpGT32Sx2
; break;
5608 case 0x6B: op
= Iop_QNarrowBin32Sto16Sx4
; eLeft
= True
; break;
5609 case 0x63: op
= Iop_QNarrowBin16Sto8Sx8
; eLeft
= True
; break;
5610 case 0x67: op
= Iop_QNarrowBin16Sto8Ux8
; eLeft
= True
; break;
5612 case 0x68: op
= Iop_InterleaveHI8x8
; eLeft
= True
; break;
5613 case 0x69: op
= Iop_InterleaveHI16x4
; eLeft
= True
; break;
5614 case 0x6A: op
= Iop_InterleaveHI32x2
; eLeft
= True
; break;
5616 case 0x60: op
= Iop_InterleaveLO8x8
; eLeft
= True
; break;
5617 case 0x61: op
= Iop_InterleaveLO16x4
; eLeft
= True
; break;
5618 case 0x62: op
= Iop_InterleaveLO32x2
; eLeft
= True
; break;
5620 case 0xDB: op
= Iop_And64
; break;
5621 case 0xDF: op
= Iop_And64
; invG
= True
; break;
5622 case 0xEB: op
= Iop_Or64
; break;
5623 case 0xEF: /* Possibly do better here if argL and argR are the
5625 op
= Iop_Xor64
; break;
5627 /* Introduced in SSE1 */
5628 case 0xE0: op
= Iop_Avg8Ux8
; break;
5629 case 0xE3: op
= Iop_Avg16Ux4
; break;
5630 case 0xEE: op
= Iop_Max16Sx4
; break;
5631 case 0xDE: op
= Iop_Max8Ux8
; break;
5632 case 0xEA: op
= Iop_Min16Sx4
; break;
5633 case 0xDA: op
= Iop_Min8Ux8
; break;
5634 case 0xE4: op
= Iop_MulHi16Ux4
; break;
5635 case 0xF6: XXX(x86g_calculate_mmx_psadbw
); break;
5637 /* Introduced in SSE2 */
5638 case 0xD4: op
= Iop_Add64
; break;
5639 case 0xFB: op
= Iop_Sub64
; break;
5642 vex_printf("\n0x%x\n", opc
);
5643 vpanic("dis_MMXop_regmem_to_reg");
5648 argG
= getMMXReg(gregOfRM(modrm
));
5650 argG
= unop(Iop_Not64
, argG
);
5654 argE
= getMMXReg(eregOfRM(modrm
));
5657 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
5659 argE
= loadLE(Ity_I64
, mkexpr(addr
));
5670 if (op
!= Iop_INVALID
) {
5671 vassert(hName
== NULL
);
5672 vassert(hAddr
== NULL
);
5673 assign(res
, binop(op
, argL
, argR
));
5675 vassert(hName
!= NULL
);
5676 vassert(hAddr
!= NULL
);
5680 0/*regparms*/, hName
, hAddr
,
5681 mkIRExprVec_2( argL
, argR
)
5686 putMMXReg( gregOfRM(modrm
), mkexpr(res
) );
5688 DIP("%s%s %s, %s\n",
5689 name
, show_granularity
? nameMMXGran(opc
& 3) : "",
5690 ( isReg
? nameMMXReg(eregOfRM(modrm
)) : dis_buf
),
5691 nameMMXReg(gregOfRM(modrm
)) );
5697 /* Vector by scalar shift of G by the amount specified at the bottom
5698 of E. This is a straight copy of dis_SSE_shiftG_byE. */
5700 static UInt
dis_MMX_shiftG_byE ( UChar sorb
, Int delta
,
5701 const HChar
* opname
, IROp op
)
5707 UChar rm
= getIByte(delta
);
5708 IRTemp g0
= newTemp(Ity_I64
);
5709 IRTemp g1
= newTemp(Ity_I64
);
5710 IRTemp amt
= newTemp(Ity_I32
);
5711 IRTemp amt8
= newTemp(Ity_I8
);
5713 if (epartIsReg(rm
)) {
5714 assign( amt
, unop(Iop_64to32
, getMMXReg(eregOfRM(rm
))) );
5715 DIP("%s %s,%s\n", opname
,
5716 nameMMXReg(eregOfRM(rm
)),
5717 nameMMXReg(gregOfRM(rm
)) );
5720 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
5721 assign( amt
, loadLE(Ity_I32
, mkexpr(addr
)) );
5722 DIP("%s %s,%s\n", opname
,
5724 nameMMXReg(gregOfRM(rm
)) );
5727 assign( g0
, getMMXReg(gregOfRM(rm
)) );
5728 assign( amt8
, unop(Iop_32to8
, mkexpr(amt
)) );
5730 shl
= shr
= sar
= False
;
5733 case Iop_ShlN16x4
: shl
= True
; size
= 32; break;
5734 case Iop_ShlN32x2
: shl
= True
; size
= 32; break;
5735 case Iop_Shl64
: shl
= True
; size
= 64; break;
5736 case Iop_ShrN16x4
: shr
= True
; size
= 16; break;
5737 case Iop_ShrN32x2
: shr
= True
; size
= 32; break;
5738 case Iop_Shr64
: shr
= True
; size
= 64; break;
5739 case Iop_SarN16x4
: sar
= True
; size
= 16; break;
5740 case Iop_SarN32x2
: sar
= True
; size
= 32; break;
5741 default: vassert(0);
5748 binop(Iop_CmpLT32U
,mkexpr(amt
),mkU32(size
)),
5749 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
5758 binop(Iop_CmpLT32U
,mkexpr(amt
),mkU32(size
)),
5759 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
5760 binop(op
, mkexpr(g0
), mkU8(size
-1))
5768 putMMXReg( gregOfRM(rm
), mkexpr(g1
) );
5773 /* Vector by scalar shift of E by an immediate byte. This is a
5774 straight copy of dis_SSE_shiftE_imm. */
5777 UInt
dis_MMX_shiftE_imm ( Int delta
, const HChar
* opname
, IROp op
)
5780 UChar rm
= getIByte(delta
);
5781 IRTemp e0
= newTemp(Ity_I64
);
5782 IRTemp e1
= newTemp(Ity_I64
);
5784 vassert(epartIsReg(rm
));
5785 vassert(gregOfRM(rm
) == 2
5786 || gregOfRM(rm
) == 4 || gregOfRM(rm
) == 6);
5787 amt
= getIByte(delta
+1);
5789 DIP("%s $%d,%s\n", opname
,
5791 nameMMXReg(eregOfRM(rm
)) );
5793 assign( e0
, getMMXReg(eregOfRM(rm
)) );
5795 shl
= shr
= sar
= False
;
5798 case Iop_ShlN16x4
: shl
= True
; size
= 16; break;
5799 case Iop_ShlN32x2
: shl
= True
; size
= 32; break;
5800 case Iop_Shl64
: shl
= True
; size
= 64; break;
5801 case Iop_SarN16x4
: sar
= True
; size
= 16; break;
5802 case Iop_SarN32x2
: sar
= True
; size
= 32; break;
5803 case Iop_ShrN16x4
: shr
= True
; size
= 16; break;
5804 case Iop_ShrN32x2
: shr
= True
; size
= 32; break;
5805 case Iop_Shr64
: shr
= True
; size
= 64; break;
5806 default: vassert(0);
5810 assign( e1
, amt
>= size
5812 : binop(op
, mkexpr(e0
), mkU8(amt
))
5816 assign( e1
, amt
>= size
5817 ? binop(op
, mkexpr(e0
), mkU8(size
-1))
5818 : binop(op
, mkexpr(e0
), mkU8(amt
))
5825 putMMXReg( eregOfRM(rm
), mkexpr(e1
) );
5830 /* Completely handle all MMX instructions except emms. */
5833 UInt
dis_MMX ( Bool
* decode_ok
, UChar sorb
, Int sz
, Int delta
)
5838 UChar opc
= getIByte(delta
);
5841 /* dis_MMX handles all insns except emms. */
5847 /* MOVD (src)ireg-or-mem (E), (dst)mmxreg (G)*/
5849 goto mmx_decode_failure
;
5850 modrm
= getIByte(delta
);
5851 if (epartIsReg(modrm
)) {
5855 binop( Iop_32HLto64
,
5857 getIReg(4, eregOfRM(modrm
)) ) );
5858 DIP("movd %s, %s\n",
5859 nameIReg(4,eregOfRM(modrm
)), nameMMXReg(gregOfRM(modrm
)));
5861 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
5865 binop( Iop_32HLto64
,
5867 loadLE(Ity_I32
, mkexpr(addr
)) ) );
5868 DIP("movd %s, %s\n", dis_buf
, nameMMXReg(gregOfRM(modrm
)));
5872 case 0x7E: /* MOVD (src)mmxreg (G), (dst)ireg-or-mem (E) */
5874 goto mmx_decode_failure
;
5875 modrm
= getIByte(delta
);
5876 if (epartIsReg(modrm
)) {
5878 putIReg( 4, eregOfRM(modrm
),
5879 unop(Iop_64to32
, getMMXReg(gregOfRM(modrm
)) ) );
5880 DIP("movd %s, %s\n",
5881 nameMMXReg(gregOfRM(modrm
)), nameIReg(4,eregOfRM(modrm
)));
5883 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
5885 storeLE( mkexpr(addr
),
5886 unop(Iop_64to32
, getMMXReg(gregOfRM(modrm
)) ) );
5887 DIP("movd %s, %s\n", nameMMXReg(gregOfRM(modrm
)), dis_buf
);
5892 /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
5894 goto mmx_decode_failure
;
5895 modrm
= getIByte(delta
);
5896 if (epartIsReg(modrm
)) {
5898 putMMXReg( gregOfRM(modrm
), getMMXReg(eregOfRM(modrm
)) );
5899 DIP("movq %s, %s\n",
5900 nameMMXReg(eregOfRM(modrm
)), nameMMXReg(gregOfRM(modrm
)));
5902 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
5904 putMMXReg( gregOfRM(modrm
), loadLE(Ity_I64
, mkexpr(addr
)) );
5905 DIP("movq %s, %s\n",
5906 dis_buf
, nameMMXReg(gregOfRM(modrm
)));
5911 /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
5913 goto mmx_decode_failure
;
5914 modrm
= getIByte(delta
);
5915 if (epartIsReg(modrm
)) {
5917 putMMXReg( eregOfRM(modrm
), getMMXReg(gregOfRM(modrm
)) );
5918 DIP("movq %s, %s\n",
5919 nameMMXReg(gregOfRM(modrm
)), nameMMXReg(eregOfRM(modrm
)));
5921 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
5923 storeLE( mkexpr(addr
), getMMXReg(gregOfRM(modrm
)) );
5924 DIP("mov(nt)q %s, %s\n",
5925 nameMMXReg(gregOfRM(modrm
)), dis_buf
);
5931 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
5933 goto mmx_decode_failure
;
5934 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "padd", True
);
5938 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
5940 goto mmx_decode_failure
;
5941 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "padds", True
);
5945 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
5947 goto mmx_decode_failure
;
5948 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "paddus", True
);
5953 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
5955 goto mmx_decode_failure
;
5956 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "psub", True
);
5960 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
5962 goto mmx_decode_failure
;
5963 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "psubs", True
);
5967 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
5969 goto mmx_decode_failure
;
5970 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "psubus", True
);
5973 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
5975 goto mmx_decode_failure
;
5976 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "pmulhw", False
);
5979 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
5981 goto mmx_decode_failure
;
5982 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "pmullw", False
);
5985 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
5987 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "pmaddwd", False
);
5992 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
5994 goto mmx_decode_failure
;
5995 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "pcmpeq", True
);
6000 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
6002 goto mmx_decode_failure
;
6003 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "pcmpgt", True
);
6006 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
6008 goto mmx_decode_failure
;
6009 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "packssdw", False
);
6012 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
6014 goto mmx_decode_failure
;
6015 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "packsswb", False
);
6018 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
6020 goto mmx_decode_failure
;
6021 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "packuswb", False
);
6026 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
6028 goto mmx_decode_failure
;
6029 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "punpckh", True
);
6034 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
6036 goto mmx_decode_failure
;
6037 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "punpckl", True
);
6040 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
6042 goto mmx_decode_failure
;
6043 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "pand", False
);
6046 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
6048 goto mmx_decode_failure
;
6049 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "pandn", False
);
6052 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
6054 goto mmx_decode_failure
;
6055 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "por", False
);
6058 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
6060 goto mmx_decode_failure
;
6061 delta
= dis_MMXop_regmem_to_reg ( sorb
, delta
, opc
, "pxor", False
);
6064 # define SHIFT_BY_REG(_name,_op) \
6065 delta = dis_MMX_shiftG_byE(sorb, delta, _name, _op); \
6068 /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
6069 case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4
);
6070 case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2
);
6071 case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64
);
6073 /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
6074 case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4
);
6075 case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2
);
6076 case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64
);
6078 /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
6079 case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4
);
6080 case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2
);
6082 # undef SHIFT_BY_REG
6087 /* (sz==4): PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
6088 UChar byte2
, subopc
;
6090 goto mmx_decode_failure
;
6091 byte2
= getIByte(delta
); /* amode / sub-opcode */
6092 subopc
= toUChar( (byte2
>> 3) & 7 );
6094 # define SHIFT_BY_IMM(_name,_op) \
6095 do { delta = dis_MMX_shiftE_imm(delta,_name,_op); \
6098 if (subopc
== 2 /*SRL*/ && opc
== 0x71)
6099 SHIFT_BY_IMM("psrlw", Iop_ShrN16x4
);
6100 else if (subopc
== 2 /*SRL*/ && opc
== 0x72)
6101 SHIFT_BY_IMM("psrld", Iop_ShrN32x2
);
6102 else if (subopc
== 2 /*SRL*/ && opc
== 0x73)
6103 SHIFT_BY_IMM("psrlq", Iop_Shr64
);
6105 else if (subopc
== 4 /*SAR*/ && opc
== 0x71)
6106 SHIFT_BY_IMM("psraw", Iop_SarN16x4
);
6107 else if (subopc
== 4 /*SAR*/ && opc
== 0x72)
6108 SHIFT_BY_IMM("psrad", Iop_SarN32x2
);
6110 else if (subopc
== 6 /*SHL*/ && opc
== 0x71)
6111 SHIFT_BY_IMM("psllw", Iop_ShlN16x4
);
6112 else if (subopc
== 6 /*SHL*/ && opc
== 0x72)
6113 SHIFT_BY_IMM("pslld", Iop_ShlN32x2
);
6114 else if (subopc
== 6 /*SHL*/ && opc
== 0x73)
6115 SHIFT_BY_IMM("psllq", Iop_Shl64
);
6117 else goto mmx_decode_failure
;
6119 # undef SHIFT_BY_IMM
6124 IRTemp addr
= newTemp(Ity_I32
);
6125 IRTemp regD
= newTemp(Ity_I64
);
6126 IRTemp regM
= newTemp(Ity_I64
);
6127 IRTemp mask
= newTemp(Ity_I64
);
6128 IRTemp olddata
= newTemp(Ity_I64
);
6129 IRTemp newdata
= newTemp(Ity_I64
);
6131 modrm
= getIByte(delta
);
6132 if (sz
!= 4 || (!epartIsReg(modrm
)))
6133 goto mmx_decode_failure
;
6136 assign( addr
, handleSegOverride( sorb
, getIReg(4, R_EDI
) ));
6137 assign( regM
, getMMXReg( eregOfRM(modrm
) ));
6138 assign( regD
, getMMXReg( gregOfRM(modrm
) ));
6139 assign( mask
, binop(Iop_SarN8x8
, mkexpr(regM
), mkU8(7)) );
6140 assign( olddata
, loadLE( Ity_I64
, mkexpr(addr
) ));
6148 unop(Iop_Not64
, mkexpr(mask
)))) );
6149 storeLE( mkexpr(addr
), mkexpr(newdata
) );
6150 DIP("maskmovq %s,%s\n", nameMMXReg( eregOfRM(modrm
) ),
6151 nameMMXReg( gregOfRM(modrm
) ) );
6155 /* --- MMX decode failure --- */
6159 return delta
; /* ignored */
6168 /*------------------------------------------------------------*/
6169 /*--- More misc arithmetic and other obscure insns. ---*/
6170 /*------------------------------------------------------------*/
6172 /* Double length left and right shifts. Apparently only required in
6173 v-size (no b- variant). */
6175 UInt
dis_SHLRD_Gv_Ev ( UChar sorb
,
6176 Int delta
, UChar modrm
,
6179 Bool amt_is_literal
,
6180 const HChar
* shift_amt_txt
,
6183 /* shift_amt :: Ity_I8 is the amount to shift. shift_amt_txt is used
6184 for printing it. And eip on entry points at the modrm byte. */
6188 IRType ty
= szToITy(sz
);
6189 IRTemp gsrc
= newTemp(ty
);
6190 IRTemp esrc
= newTemp(ty
);
6191 IRTemp addr
= IRTemp_INVALID
;
6192 IRTemp tmpSH
= newTemp(Ity_I8
);
6193 IRTemp tmpL
= IRTemp_INVALID
;
6194 IRTemp tmpRes
= IRTemp_INVALID
;
6195 IRTemp tmpSubSh
= IRTemp_INVALID
;
6199 IRExpr
* mask
= NULL
;
6201 vassert(sz
== 2 || sz
== 4);
6203 /* The E-part is the destination; this is shifted. The G-part
6204 supplies bits to be shifted into the E-part, but is not
6207 If shifting left, form a double-length word with E at the top
6208 and G at the bottom, and shift this left. The result is then in
6211 If shifting right, form a double-length word with G at the top
6212 and E at the bottom, and shift this right. The result is then
6215 /* Fetch the operands. */
6217 assign( gsrc
, getIReg(sz
, gregOfRM(modrm
)) );
6219 if (epartIsReg(modrm
)) {
6221 assign( esrc
, getIReg(sz
, eregOfRM(modrm
)) );
6222 DIP("sh%cd%c %s, %s, %s\n",
6223 ( left_shift
? 'l' : 'r' ), nameISize(sz
),
6225 nameIReg(sz
, gregOfRM(modrm
)), nameIReg(sz
, eregOfRM(modrm
)));
6227 addr
= disAMode ( &len
, sorb
, delta
, dis_buf
);
6229 assign( esrc
, loadLE(ty
, mkexpr(addr
)) );
6230 DIP("sh%cd%c %s, %s, %s\n",
6231 ( left_shift
? 'l' : 'r' ), nameISize(sz
),
6233 nameIReg(sz
, gregOfRM(modrm
)), dis_buf
);
6236 /* Round up the relevant primops. */
6239 tmpL
= newTemp(Ity_I64
);
6240 tmpRes
= newTemp(Ity_I32
);
6241 tmpSubSh
= newTemp(Ity_I32
);
6242 mkpair
= Iop_32HLto64
;
6243 getres
= left_shift
? Iop_64HIto32
: Iop_64to32
;
6244 shift
= left_shift
? Iop_Shl64
: Iop_Shr64
;
6248 tmpL
= newTemp(Ity_I32
);
6249 tmpRes
= newTemp(Ity_I16
);
6250 tmpSubSh
= newTemp(Ity_I16
);
6251 mkpair
= Iop_16HLto32
;
6252 getres
= left_shift
? Iop_32HIto16
: Iop_32to16
;
6253 shift
= left_shift
? Iop_Shl32
: Iop_Shr32
;
6257 /* Do the shift, calculate the subshift value, and set
6260 assign( tmpSH
, binop(Iop_And8
, shift_amt
, mask
) );
6263 assign( tmpL
, binop(mkpair
, mkexpr(esrc
), mkexpr(gsrc
)) );
6265 assign( tmpL
, binop(mkpair
, mkexpr(gsrc
), mkexpr(esrc
)) );
6267 assign( tmpRes
, unop(getres
, binop(shift
, mkexpr(tmpL
), mkexpr(tmpSH
)) ) );
6273 binop(Iop_Sub8
, mkexpr(tmpSH
), mkU8(1) ),
6276 setFlags_DEP1_DEP2_shift ( left_shift
? Iop_Shl32
: Iop_Sar32
,
6277 tmpRes
, tmpSubSh
, ty
, tmpSH
);
6279 /* Put result back. */
6281 if (epartIsReg(modrm
)) {
6282 putIReg(sz
, eregOfRM(modrm
), mkexpr(tmpRes
));
6284 storeLE( mkexpr(addr
), mkexpr(tmpRes
) );
6287 if (amt_is_literal
) delta
++;
6292 /* Handle BT/BTS/BTR/BTC Gv, Ev. Apparently b-size is not
6295 typedef enum { BtOpNone
, BtOpSet
, BtOpReset
, BtOpComp
} BtOp
;
6297 static const HChar
* nameBtOp ( BtOp op
)
6300 case BtOpNone
: return "";
6301 case BtOpSet
: return "s";
6302 case BtOpReset
: return "r";
6303 case BtOpComp
: return "c";
6304 default: vpanic("nameBtOp(x86)");
6310 UInt
dis_bt_G_E ( const VexAbiInfo
* vbi
,
6311 UChar sorb
, Bool locked
, Int sz
, Int delta
, BtOp op
)
6316 IRTemp t_fetched
, t_bitno0
, t_bitno1
, t_bitno2
, t_addr0
,
6317 t_addr1
, t_esp
, t_mask
, t_new
;
6319 vassert(sz
== 2 || sz
== 4);
6321 t_fetched
= t_bitno0
= t_bitno1
= t_bitno2
6322 = t_addr0
= t_addr1
= t_esp
6323 = t_mask
= t_new
= IRTemp_INVALID
;
6325 t_fetched
= newTemp(Ity_I8
);
6326 t_new
= newTemp(Ity_I8
);
6327 t_bitno0
= newTemp(Ity_I32
);
6328 t_bitno1
= newTemp(Ity_I32
);
6329 t_bitno2
= newTemp(Ity_I8
);
6330 t_addr1
= newTemp(Ity_I32
);
6331 modrm
= getIByte(delta
);
6333 assign( t_bitno0
, widenSto32(getIReg(sz
, gregOfRM(modrm
))) );
6335 if (epartIsReg(modrm
)) {
6337 /* Get it onto the client's stack. */
6338 t_esp
= newTemp(Ity_I32
);
6339 t_addr0
= newTemp(Ity_I32
);
6341 /* For the choice of the value 128, see comment in dis_bt_G_E in
6342 guest_amd64_toIR.c. We point out here only that 128 is
6343 fast-cased in Memcheck and is > 0, so seems like a good
6345 vassert(vbi
->guest_stack_redzone_size
== 0);
6346 assign( t_esp
, binop(Iop_Sub32
, getIReg(4, R_ESP
), mkU32(128)) );
6347 putIReg(4, R_ESP
, mkexpr(t_esp
));
6349 storeLE( mkexpr(t_esp
), getIReg(sz
, eregOfRM(modrm
)) );
6351 /* Make t_addr0 point at it. */
6352 assign( t_addr0
, mkexpr(t_esp
) );
6354 /* Mask out upper bits of the shift amount, since we're doing a
6356 assign( t_bitno1
, binop(Iop_And32
,
6358 mkU32(sz
== 4 ? 31 : 15)) );
6361 t_addr0
= disAMode ( &len
, sorb
, delta
, dis_buf
);
6363 assign( t_bitno1
, mkexpr(t_bitno0
) );
6366 /* At this point: t_addr0 is the address being operated on. If it
6367 was a reg, we will have pushed it onto the client's stack.
6368 t_bitno1 is the bit number, suitably masked in the case of a
6371 /* Now the main sequence. */
6375 binop(Iop_Sar32
, mkexpr(t_bitno1
), mkU8(3))) );
6377 /* t_addr1 now holds effective address */
6381 binop(Iop_And32
, mkexpr(t_bitno1
), mkU32(7))) );
6383 /* t_bitno2 contains offset of bit within byte */
6385 if (op
!= BtOpNone
) {
6386 t_mask
= newTemp(Ity_I8
);
6387 assign( t_mask
, binop(Iop_Shl8
, mkU8(1), mkexpr(t_bitno2
)) );
6390 /* t_mask is now a suitable byte mask */
6392 assign( t_fetched
, loadLE(Ity_I8
, mkexpr(t_addr1
)) );
6394 if (op
!= BtOpNone
) {
6398 binop(Iop_Or8
, mkexpr(t_fetched
), mkexpr(t_mask
)) );
6402 binop(Iop_Xor8
, mkexpr(t_fetched
), mkexpr(t_mask
)) );
6406 binop(Iop_And8
, mkexpr(t_fetched
),
6407 unop(Iop_Not8
, mkexpr(t_mask
))) );
6410 vpanic("dis_bt_G_E(x86)");
6412 if (locked
&& !epartIsReg(modrm
)) {
6413 casLE( mkexpr(t_addr1
), mkexpr(t_fetched
)/*expd*/,
6414 mkexpr(t_new
)/*new*/,
6415 guest_EIP_curr_instr
);
6417 storeLE( mkexpr(t_addr1
), mkexpr(t_new
) );
6421 /* Side effect done; now get selected bit into Carry flag */
6422 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
6423 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
6424 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
6429 unop(Iop_8Uto32
, mkexpr(t_fetched
)),
6433 /* Set NDEP even though it isn't used. This makes redundant-PUT
6434 elimination of previous stores to this field work better. */
6435 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
6437 /* Move reg operand from stack back to reg */
6438 if (epartIsReg(modrm
)) {
6439 /* t_esp still points at it. */
6440 putIReg(sz
, eregOfRM(modrm
), loadLE(szToITy(sz
), mkexpr(t_esp
)) );
6441 putIReg(4, R_ESP
, binop(Iop_Add32
, mkexpr(t_esp
), mkU32(128)) );
6444 DIP("bt%s%c %s, %s\n",
6445 nameBtOp(op
), nameISize(sz
), nameIReg(sz
, gregOfRM(modrm
)),
6446 ( epartIsReg(modrm
) ? nameIReg(sz
, eregOfRM(modrm
)) : dis_buf
) );
6453 /* Handle BSF/BSR. Only v-size seems necessary. */
6455 UInt
dis_bs_E_G ( UChar sorb
, Int sz
, Int delta
, Bool fwds
)
6461 IRType ty
= szToITy(sz
);
6462 IRTemp src
= newTemp(ty
);
6463 IRTemp dst
= newTemp(ty
);
6465 IRTemp src32
= newTemp(Ity_I32
);
6466 IRTemp dst32
= newTemp(Ity_I32
);
6467 IRTemp srcB
= newTemp(Ity_I1
);
6469 vassert(sz
== 4 || sz
== 2);
6471 modrm
= getIByte(delta
);
6473 isReg
= epartIsReg(modrm
);
6476 assign( src
, getIReg(sz
, eregOfRM(modrm
)) );
6479 IRTemp addr
= disAMode( &len
, sorb
, delta
, dis_buf
);
6481 assign( src
, loadLE(ty
, mkexpr(addr
)) );
6484 DIP("bs%c%c %s, %s\n",
6485 fwds
? 'f' : 'r', nameISize(sz
),
6486 ( isReg
? nameIReg(sz
, eregOfRM(modrm
)) : dis_buf
),
6487 nameIReg(sz
, gregOfRM(modrm
)));
6489 /* Generate a bool expression which is zero iff the original is
6490 zero, and nonzero otherwise. Ask for a CmpNE version which, if
6491 instrumented by Memcheck, is instrumented expensively, since
6492 this may be used on the output of a preceding movmskb insn,
6493 which has been known to be partially defined, and in need of
6494 careful handling. */
6495 assign( srcB
, binop(mkSizedOp(ty
,Iop_ExpCmpNE8
),
6496 mkexpr(src
), mkU(ty
,0)) );
6498 /* Flags: Z is 1 iff source value is zero. All others
6499 are undefined -- we force them to zero. */
6500 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
6501 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
6504 IRExpr_ITE( mkexpr(srcB
),
6508 mkU32(X86G_CC_MASK_Z
)
6511 /* Set NDEP even though it isn't used. This makes redundant-PUT
6512 elimination of previous stores to this field work better. */
6513 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
6515 /* Result: iff source value is zero, we can't use
6516 Iop_Clz32/Iop_Ctz32 as they have no defined result in that case.
6517 But anyway, Intel x86 semantics say the result is undefined in
6518 such situations. Hence handle the zero case specially. */
6520 /* Bleh. What we compute:
6522 bsf32: if src == 0 then 0 else Ctz32(src)
6523 bsr32: if src == 0 then 0 else 31 - Clz32(src)
6525 bsf16: if src == 0 then 0 else Ctz32(16Uto32(src))
6526 bsr16: if src == 0 then 0 else 31 - Clz32(16Uto32(src))
6528 First, widen src to 32 bits if it is not already.
6530 Postscript 15 Oct 04: it seems that at least VIA Nehemiah leaves the
6531 dst register unchanged when src == 0. Hence change accordingly.
6534 assign( src32
, unop(Iop_16Uto32
, mkexpr(src
)) );
6536 assign( src32
, mkexpr(src
) );
6538 /* The main computation, guarding against zero. */
6543 fwds
? unop(Iop_Ctz32
, mkexpr(src32
))
6546 unop(Iop_Clz32
, mkexpr(src32
))),
6547 /* src == 0 -- leave dst unchanged */
6548 widenUto32( getIReg( sz
, gregOfRM(modrm
) ) )
6553 assign( dst
, unop(Iop_32to16
, mkexpr(dst32
)) );
6555 assign( dst
, mkexpr(dst32
) );
6557 /* dump result back */
6558 putIReg( sz
, gregOfRM(modrm
), mkexpr(dst
) );
6565 void codegen_xchg_eAX_Reg ( Int sz
, Int reg
)
6567 IRType ty
= szToITy(sz
);
6568 IRTemp t1
= newTemp(ty
);
6569 IRTemp t2
= newTemp(ty
);
6570 vassert(sz
== 2 || sz
== 4);
6571 assign( t1
, getIReg(sz
, R_EAX
) );
6572 assign( t2
, getIReg(sz
, reg
) );
6573 putIReg( sz
, R_EAX
, mkexpr(t2
) );
6574 putIReg( sz
, reg
, mkexpr(t1
) );
6575 DIP("xchg%c %s, %s\n",
6576 nameISize(sz
), nameIReg(sz
, R_EAX
), nameIReg(sz
, reg
));
6581 void codegen_SAHF ( void )
6583 /* Set the flags to:
6584 (x86g_calculate_flags_all() & X86G_CC_MASK_O) -- retain the old O flag
6585 | (%AH & (X86G_CC_MASK_S|X86G_CC_MASK_Z|X86G_CC_MASK_A
6586 |X86G_CC_MASK_P|X86G_CC_MASK_C)
6588 UInt mask_SZACP
= X86G_CC_MASK_S
|X86G_CC_MASK_Z
|X86G_CC_MASK_A
6589 |X86G_CC_MASK_C
|X86G_CC_MASK_P
;
6590 IRTemp oldflags
= newTemp(Ity_I32
);
6591 assign( oldflags
, mk_x86g_calculate_eflags_all() );
6592 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
6593 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
6594 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
6595 stmt( IRStmt_Put( OFFB_CC_DEP1
,
6597 binop(Iop_And32
, mkexpr(oldflags
), mkU32(X86G_CC_MASK_O
)),
6599 binop(Iop_Shr32
, getIReg(4, R_EAX
), mkU8(8)),
6603 /* Set NDEP even though it isn't used. This makes redundant-PUT
6604 elimination of previous stores to this field work better. */
6605 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
6610 void codegen_LAHF ( void )
6612 /* AH <- EFLAGS(SF:ZF:0:AF:0:PF:1:CF) */
6613 IRExpr
* eax_with_hole
;
6616 UInt mask_SZACP
= X86G_CC_MASK_S
|X86G_CC_MASK_Z
|X86G_CC_MASK_A
6617 |X86G_CC_MASK_C
|X86G_CC_MASK_P
;
6619 IRTemp flags
= newTemp(Ity_I32
);
6620 assign( flags
, mk_x86g_calculate_eflags_all() );
6623 = binop(Iop_And32
, getIReg(4, R_EAX
), mkU32(0xFFFF00FF));
6625 = binop(Iop_Or32
, binop(Iop_And32
, mkexpr(flags
), mkU32(mask_SZACP
)),
6628 = binop(Iop_Or32
, eax_with_hole
,
6629 binop(Iop_Shl32
, new_byte
, mkU8(8)));
6630 putIReg(4, R_EAX
, new_eax
);
6635 UInt
dis_cmpxchg_G_E ( UChar sorb
,
6643 IRType ty
= szToITy(size
);
6644 IRTemp acc
= newTemp(ty
);
6645 IRTemp src
= newTemp(ty
);
6646 IRTemp dest
= newTemp(ty
);
6647 IRTemp dest2
= newTemp(ty
);
6648 IRTemp acc2
= newTemp(ty
);
6649 IRTemp cond
= newTemp(Ity_I1
);
6650 IRTemp addr
= IRTemp_INVALID
;
6651 UChar rm
= getUChar(delta0
);
6653 /* There are 3 cases to consider:
6655 reg-reg: ignore any lock prefix, generate sequence based
6658 reg-mem, not locked: ignore any lock prefix, generate sequence
6661 reg-mem, locked: use IRCAS
6663 if (epartIsReg(rm
)) {
6665 assign( dest
, getIReg(size
, eregOfRM(rm
)) );
6667 assign( src
, getIReg(size
, gregOfRM(rm
)) );
6668 assign( acc
, getIReg(size
, R_EAX
) );
6669 setFlags_DEP1_DEP2(Iop_Sub8
, acc
, dest
, ty
);
6670 assign( cond
, mk_x86g_calculate_condition(X86CondZ
) );
6671 assign( dest2
, IRExpr_ITE(mkexpr(cond
), mkexpr(src
), mkexpr(dest
)) );
6672 assign( acc2
, IRExpr_ITE(mkexpr(cond
), mkexpr(acc
), mkexpr(dest
)) );
6673 putIReg(size
, R_EAX
, mkexpr(acc2
));
6674 putIReg(size
, eregOfRM(rm
), mkexpr(dest2
));
6675 DIP("cmpxchg%c %s,%s\n", nameISize(size
),
6676 nameIReg(size
,gregOfRM(rm
)),
6677 nameIReg(size
,eregOfRM(rm
)) );
6679 else if (!epartIsReg(rm
) && !locked
) {
6681 addr
= disAMode ( &len
, sorb
, delta0
, dis_buf
);
6682 assign( dest
, loadLE(ty
, mkexpr(addr
)) );
6684 assign( src
, getIReg(size
, gregOfRM(rm
)) );
6685 assign( acc
, getIReg(size
, R_EAX
) );
6686 setFlags_DEP1_DEP2(Iop_Sub8
, acc
, dest
, ty
);
6687 assign( cond
, mk_x86g_calculate_condition(X86CondZ
) );
6688 assign( dest2
, IRExpr_ITE(mkexpr(cond
), mkexpr(src
), mkexpr(dest
)) );
6689 assign( acc2
, IRExpr_ITE(mkexpr(cond
), mkexpr(acc
), mkexpr(dest
)) );
6690 putIReg(size
, R_EAX
, mkexpr(acc2
));
6691 storeLE( mkexpr(addr
), mkexpr(dest2
) );
6692 DIP("cmpxchg%c %s,%s\n", nameISize(size
),
6693 nameIReg(size
,gregOfRM(rm
)), dis_buf
);
6695 else if (!epartIsReg(rm
) && locked
) {
6697 /* src is new value. acc is expected value. dest is old value.
6698 Compute success from the output of the IRCAS, and steer the
6699 new value for EAX accordingly: in case of success, EAX is
6701 addr
= disAMode ( &len
, sorb
, delta0
, dis_buf
);
6703 assign( src
, getIReg(size
, gregOfRM(rm
)) );
6704 assign( acc
, getIReg(size
, R_EAX
) );
6706 mkIRCAS( IRTemp_INVALID
, dest
, Iend_LE
, mkexpr(addr
),
6707 NULL
, mkexpr(acc
), NULL
, mkexpr(src
) )
6709 setFlags_DEP1_DEP2(Iop_Sub8
, acc
, dest
, ty
);
6710 assign( cond
, mk_x86g_calculate_condition(X86CondZ
) );
6711 assign( acc2
, IRExpr_ITE(mkexpr(cond
), mkexpr(acc
), mkexpr(dest
)) );
6712 putIReg(size
, R_EAX
, mkexpr(acc2
));
6713 DIP("cmpxchg%c %s,%s\n", nameISize(size
),
6714 nameIReg(size
,gregOfRM(rm
)), dis_buf
);
6722 /* Handle conditional move instructions of the form
6723 cmovcc E(reg-or-mem), G(reg)
6725 E(src) is reg-or-mem
6728 If E is reg, --> GET %E, tmps
6733 If E is mem --> (getAddr E) -> tmpa
6740 UInt
dis_cmov_E_G ( UChar sorb
,
6745 UChar rm
= getIByte(delta0
);
6749 IRType ty
= szToITy(sz
);
6750 IRTemp tmps
= newTemp(ty
);
6751 IRTemp tmpd
= newTemp(ty
);
6753 if (epartIsReg(rm
)) {
6754 assign( tmps
, getIReg(sz
, eregOfRM(rm
)) );
6755 assign( tmpd
, getIReg(sz
, gregOfRM(rm
)) );
6757 putIReg(sz
, gregOfRM(rm
),
6758 IRExpr_ITE( mk_x86g_calculate_condition(cond
),
6762 DIP("cmov%c%s %s,%s\n", nameISize(sz
),
6763 name_X86Condcode(cond
),
6764 nameIReg(sz
,eregOfRM(rm
)),
6765 nameIReg(sz
,gregOfRM(rm
)));
6769 /* E refers to memory */
6771 IRTemp addr
= disAMode ( &len
, sorb
, delta0
, dis_buf
);
6772 assign( tmps
, loadLE(ty
, mkexpr(addr
)) );
6773 assign( tmpd
, getIReg(sz
, gregOfRM(rm
)) );
6775 putIReg(sz
, gregOfRM(rm
),
6776 IRExpr_ITE( mk_x86g_calculate_condition(cond
),
6781 DIP("cmov%c%s %s,%s\n", nameISize(sz
),
6782 name_X86Condcode(cond
),
6784 nameIReg(sz
,gregOfRM(rm
)));
6791 UInt
dis_xadd_G_E ( UChar sorb
, Bool locked
, Int sz
, Int delta0
,
6795 UChar rm
= getIByte(delta0
);
6798 IRType ty
= szToITy(sz
);
6799 IRTemp tmpd
= newTemp(ty
);
6800 IRTemp tmpt0
= newTemp(ty
);
6801 IRTemp tmpt1
= newTemp(ty
);
6803 /* There are 3 cases to consider:
6805 reg-reg: ignore any lock prefix,
6806 generate 'naive' (non-atomic) sequence
6808 reg-mem, not locked: ignore any lock prefix, generate 'naive'
6809 (non-atomic) sequence
6811 reg-mem, locked: use IRCAS
6814 if (epartIsReg(rm
)) {
6816 assign( tmpd
, getIReg(sz
, eregOfRM(rm
)));
6817 assign( tmpt0
, getIReg(sz
, gregOfRM(rm
)) );
6818 assign( tmpt1
, binop(mkSizedOp(ty
,Iop_Add8
),
6819 mkexpr(tmpd
), mkexpr(tmpt0
)) );
6820 setFlags_DEP1_DEP2( Iop_Add8
, tmpd
, tmpt0
, ty
);
6821 putIReg(sz
, eregOfRM(rm
), mkexpr(tmpt1
));
6822 putIReg(sz
, gregOfRM(rm
), mkexpr(tmpd
));
6823 DIP("xadd%c %s, %s\n",
6824 nameISize(sz
), nameIReg(sz
,gregOfRM(rm
)),
6825 nameIReg(sz
,eregOfRM(rm
)));
6829 else if (!epartIsReg(rm
) && !locked
) {
6831 IRTemp addr
= disAMode ( &len
, sorb
, delta0
, dis_buf
);
6832 assign( tmpd
, loadLE(ty
, mkexpr(addr
)) );
6833 assign( tmpt0
, getIReg(sz
, gregOfRM(rm
)) );
6834 assign( tmpt1
, binop(mkSizedOp(ty
,Iop_Add8
),
6835 mkexpr(tmpd
), mkexpr(tmpt0
)) );
6836 storeLE( mkexpr(addr
), mkexpr(tmpt1
) );
6837 setFlags_DEP1_DEP2( Iop_Add8
, tmpd
, tmpt0
, ty
);
6838 putIReg(sz
, gregOfRM(rm
), mkexpr(tmpd
));
6839 DIP("xadd%c %s, %s\n",
6840 nameISize(sz
), nameIReg(sz
,gregOfRM(rm
)), dis_buf
);
6844 else if (!epartIsReg(rm
) && locked
) {
6846 IRTemp addr
= disAMode ( &len
, sorb
, delta0
, dis_buf
);
6847 assign( tmpd
, loadLE(ty
, mkexpr(addr
)) );
6848 assign( tmpt0
, getIReg(sz
, gregOfRM(rm
)) );
6849 assign( tmpt1
, binop(mkSizedOp(ty
,Iop_Add8
),
6850 mkexpr(tmpd
), mkexpr(tmpt0
)) );
6851 casLE( mkexpr(addr
), mkexpr(tmpd
)/*expVal*/,
6852 mkexpr(tmpt1
)/*newVal*/, guest_EIP_curr_instr
);
6853 setFlags_DEP1_DEP2( Iop_Add8
, tmpd
, tmpt0
, ty
);
6854 putIReg(sz
, gregOfRM(rm
), mkexpr(tmpd
));
6855 DIP("xadd%c %s, %s\n",
6856 nameISize(sz
), nameIReg(sz
,gregOfRM(rm
)), dis_buf
);
6864 /* Move 16 bits from Ew (ireg or mem) to G (a segment register). */
6867 UInt
dis_mov_Ew_Sw ( UChar sorb
, Int delta0
)
6871 UChar rm
= getIByte(delta0
);
6874 if (epartIsReg(rm
)) {
6875 putSReg( gregOfRM(rm
), getIReg(2, eregOfRM(rm
)) );
6876 DIP("movw %s,%s\n", nameIReg(2,eregOfRM(rm
)), nameSReg(gregOfRM(rm
)));
6879 addr
= disAMode ( &len
, sorb
, delta0
, dis_buf
);
6880 putSReg( gregOfRM(rm
), loadLE(Ity_I16
, mkexpr(addr
)) );
6881 DIP("movw %s,%s\n", dis_buf
, nameSReg(gregOfRM(rm
)));
6886 /* Move 16 bits from G (a segment register) to Ew (ireg or mem). If
6887 dst is ireg and sz==4, zero out top half of it. */
6890 UInt
dis_mov_Sw_Ew ( UChar sorb
,
6896 UChar rm
= getIByte(delta0
);
6899 vassert(sz
== 2 || sz
== 4);
6901 if (epartIsReg(rm
)) {
6903 putIReg(4, eregOfRM(rm
), unop(Iop_16Uto32
, getSReg(gregOfRM(rm
))));
6905 putIReg(2, eregOfRM(rm
), getSReg(gregOfRM(rm
)));
6907 DIP("mov %s,%s\n", nameSReg(gregOfRM(rm
)), nameIReg(sz
,eregOfRM(rm
)));
6910 addr
= disAMode ( &len
, sorb
, delta0
, dis_buf
);
6911 storeLE( mkexpr(addr
), getSReg(gregOfRM(rm
)) );
6912 DIP("mov %s,%s\n", nameSReg(gregOfRM(rm
)), dis_buf
);
6919 void dis_push_segreg ( UInt sreg
, Int sz
)
6921 IRTemp t1
= newTemp(Ity_I16
);
6922 IRTemp ta
= newTemp(Ity_I32
);
6923 vassert(sz
== 2 || sz
== 4);
6925 assign( t1
, getSReg(sreg
) );
6926 assign( ta
, binop(Iop_Sub32
, getIReg(4, R_ESP
), mkU32(sz
)) );
6927 putIReg(4, R_ESP
, mkexpr(ta
));
6928 storeLE( mkexpr(ta
), mkexpr(t1
) );
6930 DIP("push%c %s\n", sz
==2 ? 'w' : 'l', nameSReg(sreg
));
6934 void dis_pop_segreg ( UInt sreg
, Int sz
)
6936 IRTemp t1
= newTemp(Ity_I16
);
6937 IRTemp ta
= newTemp(Ity_I32
);
6938 vassert(sz
== 2 || sz
== 4);
6940 assign( ta
, getIReg(4, R_ESP
) );
6941 assign( t1
, loadLE(Ity_I16
, mkexpr(ta
)) );
6943 putIReg(4, R_ESP
, binop(Iop_Add32
, mkexpr(ta
), mkU32(sz
)) );
6944 putSReg( sreg
, mkexpr(t1
) );
6945 DIP("pop%c %s\n", sz
==2 ? 'w' : 'l', nameSReg(sreg
));
6949 void dis_ret ( /*MOD*/DisResult
* dres
, UInt d32
)
6951 IRTemp t1
= newTemp(Ity_I32
);
6952 IRTemp t2
= newTemp(Ity_I32
);
6953 assign(t1
, getIReg(4,R_ESP
));
6954 assign(t2
, loadLE(Ity_I32
,mkexpr(t1
)));
6955 putIReg(4, R_ESP
,binop(Iop_Add32
, mkexpr(t1
), mkU32(4+d32
)));
6956 jmp_treg(dres
, Ijk_Ret
, t2
);
6957 vassert(dres
->whatNext
== Dis_StopHere
);
6960 /*------------------------------------------------------------*/
6961 /*--- SSE/SSE2/SSE3 helpers ---*/
6962 /*------------------------------------------------------------*/
6964 /* Indicates whether the op requires a rounding-mode argument. Note
6965 that this covers only vector floating point arithmetic ops, and
6966 omits the scalar ones that need rounding modes. Note also that
6967 inconsistencies here will get picked up later by the IR sanity
6968 checker, so this isn't correctness-critical. */
6969 static Bool
requiresRMode ( IROp op
)
6973 case Iop_Add32Fx4
: case Iop_Sub32Fx4
:
6974 case Iop_Mul32Fx4
: case Iop_Div32Fx4
:
6975 case Iop_Add64Fx2
: case Iop_Sub64Fx2
:
6976 case Iop_Mul64Fx2
: case Iop_Div64Fx2
:
6985 /* Worker function; do not call directly.
6986 Handles full width G = G `op` E and G = (not G) `op` E.
6989 static UInt
dis_SSE_E_to_G_all_wrk (
6990 UChar sorb
, Int delta
,
6991 const HChar
* opname
, IROp op
,
6998 UChar rm
= getIByte(delta
);
7000 = invertG
? unop(Iop_NotV128
, getXMMReg(gregOfRM(rm
)))
7001 : getXMMReg(gregOfRM(rm
));
7002 if (epartIsReg(rm
)) {
7006 ? triop(op
, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
7008 getXMMReg(eregOfRM(rm
)))
7010 getXMMReg(eregOfRM(rm
)))
7012 DIP("%s %s,%s\n", opname
,
7013 nameXMMReg(eregOfRM(rm
)),
7014 nameXMMReg(gregOfRM(rm
)) );
7017 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
7021 ? triop(op
, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
7023 loadLE(Ity_V128
, mkexpr(addr
)))
7025 loadLE(Ity_V128
, mkexpr(addr
)))
7027 DIP("%s %s,%s\n", opname
,
7029 nameXMMReg(gregOfRM(rm
)) );
7035 /* All lanes SSE binary operation, G = G `op` E. */
7038 UInt
dis_SSE_E_to_G_all ( UChar sorb
, Int delta
, const HChar
* opname
, IROp op
)
7040 return dis_SSE_E_to_G_all_wrk( sorb
, delta
, opname
, op
, False
);
7043 /* All lanes SSE binary operation, G = (not G) `op` E. */
7046 UInt
dis_SSE_E_to_G_all_invG ( UChar sorb
, Int delta
,
7047 const HChar
* opname
, IROp op
)
7049 return dis_SSE_E_to_G_all_wrk( sorb
, delta
, opname
, op
, True
);
7053 /* Lowest 32-bit lane only SSE binary operation, G = G `op` E. */
7055 static UInt
dis_SSE_E_to_G_lo32 ( UChar sorb
, Int delta
,
7056 const HChar
* opname
, IROp op
)
7061 UChar rm
= getIByte(delta
);
7062 IRExpr
* gpart
= getXMMReg(gregOfRM(rm
));
7063 if (epartIsReg(rm
)) {
7064 putXMMReg( gregOfRM(rm
),
7066 getXMMReg(eregOfRM(rm
))) );
7067 DIP("%s %s,%s\n", opname
,
7068 nameXMMReg(eregOfRM(rm
)),
7069 nameXMMReg(gregOfRM(rm
)) );
7072 /* We can only do a 32-bit memory read, so the upper 3/4 of the
7073 E operand needs to be made simply of zeroes. */
7074 IRTemp epart
= newTemp(Ity_V128
);
7075 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
7076 assign( epart
, unop( Iop_32UtoV128
,
7077 loadLE(Ity_I32
, mkexpr(addr
))) );
7078 putXMMReg( gregOfRM(rm
),
7079 binop(op
, gpart
, mkexpr(epart
)) );
7080 DIP("%s %s,%s\n", opname
,
7082 nameXMMReg(gregOfRM(rm
)) );
7088 /* Lower 64-bit lane only SSE binary operation, G = G `op` E. */
7090 static UInt
dis_SSE_E_to_G_lo64 ( UChar sorb
, Int delta
,
7091 const HChar
* opname
, IROp op
)
7096 UChar rm
= getIByte(delta
);
7097 IRExpr
* gpart
= getXMMReg(gregOfRM(rm
));
7098 if (epartIsReg(rm
)) {
7099 putXMMReg( gregOfRM(rm
),
7101 getXMMReg(eregOfRM(rm
))) );
7102 DIP("%s %s,%s\n", opname
,
7103 nameXMMReg(eregOfRM(rm
)),
7104 nameXMMReg(gregOfRM(rm
)) );
7107 /* We can only do a 64-bit memory read, so the upper half of the
7108 E operand needs to be made simply of zeroes. */
7109 IRTemp epart
= newTemp(Ity_V128
);
7110 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
7111 assign( epart
, unop( Iop_64UtoV128
,
7112 loadLE(Ity_I64
, mkexpr(addr
))) );
7113 putXMMReg( gregOfRM(rm
),
7114 binop(op
, gpart
, mkexpr(epart
)) );
7115 DIP("%s %s,%s\n", opname
,
7117 nameXMMReg(gregOfRM(rm
)) );
7123 /* All lanes unary SSE operation, G = op(E). */
7125 static UInt
dis_SSE_E_to_G_unary_all (
7126 UChar sorb
, Int delta
,
7127 const HChar
* opname
, IROp op
7133 UChar rm
= getIByte(delta
);
7134 // Sqrt32Fx4 and Sqrt64Fx2 take a rounding mode, which is faked
7135 // up in the usual way.
7136 Bool needsIRRM
= op
== Iop_Sqrt32Fx4
|| op
== Iop_Sqrt64Fx2
;
7137 if (epartIsReg(rm
)) {
7138 IRExpr
* src
= getXMMReg(eregOfRM(rm
));
7139 /* XXXROUNDINGFIXME */
7140 IRExpr
* res
= needsIRRM
? binop(op
, get_FAKE_roundingmode(), src
)
7142 putXMMReg( gregOfRM(rm
), res
);
7143 DIP("%s %s,%s\n", opname
,
7144 nameXMMReg(eregOfRM(rm
)),
7145 nameXMMReg(gregOfRM(rm
)) );
7148 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
7149 IRExpr
* src
= loadLE(Ity_V128
, mkexpr(addr
));
7150 /* XXXROUNDINGFIXME */
7151 IRExpr
* res
= needsIRRM
? binop(op
, get_FAKE_roundingmode(), src
)
7153 putXMMReg( gregOfRM(rm
), res
);
7154 DIP("%s %s,%s\n", opname
,
7156 nameXMMReg(gregOfRM(rm
)) );
7162 /* Lowest 32-bit lane only unary SSE operation, G = op(E). */
7164 static UInt
dis_SSE_E_to_G_unary_lo32 (
7165 UChar sorb
, Int delta
,
7166 const HChar
* opname
, IROp op
7169 /* First we need to get the old G value and patch the low 32 bits
7170 of the E operand into it. Then apply op and write back to G. */
7174 UChar rm
= getIByte(delta
);
7175 IRTemp oldG0
= newTemp(Ity_V128
);
7176 IRTemp oldG1
= newTemp(Ity_V128
);
7178 assign( oldG0
, getXMMReg(gregOfRM(rm
)) );
7180 if (epartIsReg(rm
)) {
7182 binop( Iop_SetV128lo32
,
7184 getXMMRegLane32(eregOfRM(rm
), 0)) );
7185 putXMMReg( gregOfRM(rm
), unop(op
, mkexpr(oldG1
)) );
7186 DIP("%s %s,%s\n", opname
,
7187 nameXMMReg(eregOfRM(rm
)),
7188 nameXMMReg(gregOfRM(rm
)) );
7191 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
7193 binop( Iop_SetV128lo32
,
7195 loadLE(Ity_I32
, mkexpr(addr
)) ));
7196 putXMMReg( gregOfRM(rm
), unop(op
, mkexpr(oldG1
)) );
7197 DIP("%s %s,%s\n", opname
,
7199 nameXMMReg(gregOfRM(rm
)) );
7205 /* Lowest 64-bit lane only unary SSE operation, G = op(E). */
7207 static UInt
dis_SSE_E_to_G_unary_lo64 (
7208 UChar sorb
, Int delta
,
7209 const HChar
* opname
, IROp op
7212 /* First we need to get the old G value and patch the low 64 bits
7213 of the E operand into it. Then apply op and write back to G. */
7217 UChar rm
= getIByte(delta
);
7218 IRTemp oldG0
= newTemp(Ity_V128
);
7219 IRTemp oldG1
= newTemp(Ity_V128
);
7221 assign( oldG0
, getXMMReg(gregOfRM(rm
)) );
7223 if (epartIsReg(rm
)) {
7225 binop( Iop_SetV128lo64
,
7227 getXMMRegLane64(eregOfRM(rm
), 0)) );
7228 putXMMReg( gregOfRM(rm
), unop(op
, mkexpr(oldG1
)) );
7229 DIP("%s %s,%s\n", opname
,
7230 nameXMMReg(eregOfRM(rm
)),
7231 nameXMMReg(gregOfRM(rm
)) );
7234 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
7236 binop( Iop_SetV128lo64
,
7238 loadLE(Ity_I64
, mkexpr(addr
)) ));
7239 putXMMReg( gregOfRM(rm
), unop(op
, mkexpr(oldG1
)) );
7240 DIP("%s %s,%s\n", opname
,
7242 nameXMMReg(gregOfRM(rm
)) );
7248 /* SSE integer binary operation:
7249 G = G `op` E (eLeft == False)
7250 G = E `op` G (eLeft == True)
7252 static UInt
dis_SSEint_E_to_G(
7253 UChar sorb
, Int delta
,
7254 const HChar
* opname
, IROp op
,
7261 UChar rm
= getIByte(delta
);
7262 IRExpr
* gpart
= getXMMReg(gregOfRM(rm
));
7263 IRExpr
* epart
= NULL
;
7264 if (epartIsReg(rm
)) {
7265 epart
= getXMMReg(eregOfRM(rm
));
7266 DIP("%s %s,%s\n", opname
,
7267 nameXMMReg(eregOfRM(rm
)),
7268 nameXMMReg(gregOfRM(rm
)) );
7271 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
7272 epart
= loadLE(Ity_V128
, mkexpr(addr
));
7273 DIP("%s %s,%s\n", opname
,
7275 nameXMMReg(gregOfRM(rm
)) );
7278 putXMMReg( gregOfRM(rm
),
7279 eLeft
? binop(op
, epart
, gpart
)
7280 : binop(op
, gpart
, epart
) );
7285 /* Helper for doing SSE FP comparisons. */
7287 static void findSSECmpOp ( Bool
* needNot
, IROp
* op
,
7288 Int imm8
, Bool all_lanes
, Int sz
)
7298 if (sz
== 4 && all_lanes
) {
7300 case 0: *op
= Iop_CmpEQ32Fx4
; return;
7301 case 1: *op
= Iop_CmpLT32Fx4
; return;
7302 case 2: *op
= Iop_CmpLE32Fx4
; return;
7303 case 3: *op
= Iop_CmpUN32Fx4
; return;
7307 if (sz
== 4 && !all_lanes
) {
7309 case 0: *op
= Iop_CmpEQ32F0x4
; return;
7310 case 1: *op
= Iop_CmpLT32F0x4
; return;
7311 case 2: *op
= Iop_CmpLE32F0x4
; return;
7312 case 3: *op
= Iop_CmpUN32F0x4
; return;
7316 if (sz
== 8 && all_lanes
) {
7318 case 0: *op
= Iop_CmpEQ64Fx2
; return;
7319 case 1: *op
= Iop_CmpLT64Fx2
; return;
7320 case 2: *op
= Iop_CmpLE64Fx2
; return;
7321 case 3: *op
= Iop_CmpUN64Fx2
; return;
7325 if (sz
== 8 && !all_lanes
) {
7327 case 0: *op
= Iop_CmpEQ64F0x2
; return;
7328 case 1: *op
= Iop_CmpLT64F0x2
; return;
7329 case 2: *op
= Iop_CmpLE64F0x2
; return;
7330 case 3: *op
= Iop_CmpUN64F0x2
; return;
7334 vpanic("findSSECmpOp(x86,guest)");
7337 /* Handles SSE 32F/64F comparisons. */
7339 static UInt
dis_SSEcmp_E_to_G ( UChar sorb
, Int delta
,
7340 const HChar
* opname
, Bool all_lanes
, Int sz
)
7345 Bool needNot
= False
;
7346 IROp op
= Iop_INVALID
;
7347 IRTemp plain
= newTemp(Ity_V128
);
7348 UChar rm
= getIByte(delta
);
7350 vassert(sz
== 4 || sz
== 8);
7351 if (epartIsReg(rm
)) {
7352 imm8
= getIByte(delta
+1);
7353 findSSECmpOp(&needNot
, &op
, imm8
, all_lanes
, sz
);
7354 assign( plain
, binop(op
, getXMMReg(gregOfRM(rm
)),
7355 getXMMReg(eregOfRM(rm
))) );
7357 DIP("%s $%d,%s,%s\n", opname
,
7359 nameXMMReg(eregOfRM(rm
)),
7360 nameXMMReg(gregOfRM(rm
)) );
7362 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
7363 imm8
= getIByte(delta
+alen
);
7364 findSSECmpOp(&needNot
, &op
, imm8
, all_lanes
, sz
);
7368 getXMMReg(gregOfRM(rm
)),
7369 all_lanes
? loadLE(Ity_V128
, mkexpr(addr
))
7370 : sz
== 8 ? unop( Iop_64UtoV128
, loadLE(Ity_I64
, mkexpr(addr
)))
7371 : /*sz==4*/ unop( Iop_32UtoV128
, loadLE(Ity_I32
, mkexpr(addr
)))
7375 DIP("%s $%d,%s,%s\n", opname
,
7378 nameXMMReg(gregOfRM(rm
)) );
7381 if (needNot
&& all_lanes
) {
7382 putXMMReg( gregOfRM(rm
),
7383 unop(Iop_NotV128
, mkexpr(plain
)) );
7386 if (needNot
&& !all_lanes
) {
7387 mask
= toUShort( sz
==4 ? 0x000F : 0x00FF );
7388 putXMMReg( gregOfRM(rm
),
7389 binop(Iop_XorV128
, mkexpr(plain
), mkV128(mask
)) );
7392 putXMMReg( gregOfRM(rm
), mkexpr(plain
) );
7399 /* Vector by scalar shift of G by the amount specified at the bottom
7402 static UInt
dis_SSE_shiftG_byE ( UChar sorb
, Int delta
,
7403 const HChar
* opname
, IROp op
)
7409 UChar rm
= getIByte(delta
);
7410 IRTemp g0
= newTemp(Ity_V128
);
7411 IRTemp g1
= newTemp(Ity_V128
);
7412 IRTemp amt
= newTemp(Ity_I32
);
7413 IRTemp amt8
= newTemp(Ity_I8
);
7414 if (epartIsReg(rm
)) {
7415 assign( amt
, getXMMRegLane32(eregOfRM(rm
), 0) );
7416 DIP("%s %s,%s\n", opname
,
7417 nameXMMReg(eregOfRM(rm
)),
7418 nameXMMReg(gregOfRM(rm
)) );
7421 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
7422 assign( amt
, loadLE(Ity_I32
, mkexpr(addr
)) );
7423 DIP("%s %s,%s\n", opname
,
7425 nameXMMReg(gregOfRM(rm
)) );
7428 assign( g0
, getXMMReg(gregOfRM(rm
)) );
7429 assign( amt8
, unop(Iop_32to8
, mkexpr(amt
)) );
7431 shl
= shr
= sar
= False
;
7434 case Iop_ShlN16x8
: shl
= True
; size
= 32; break;
7435 case Iop_ShlN32x4
: shl
= True
; size
= 32; break;
7436 case Iop_ShlN64x2
: shl
= True
; size
= 64; break;
7437 case Iop_SarN16x8
: sar
= True
; size
= 16; break;
7438 case Iop_SarN32x4
: sar
= True
; size
= 32; break;
7439 case Iop_ShrN16x8
: shr
= True
; size
= 16; break;
7440 case Iop_ShrN32x4
: shr
= True
; size
= 32; break;
7441 case Iop_ShrN64x2
: shr
= True
; size
= 64; break;
7442 default: vassert(0);
7449 binop(Iop_CmpLT32U
,mkexpr(amt
),mkU32(size
)),
7450 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
7459 binop(Iop_CmpLT32U
,mkexpr(amt
),mkU32(size
)),
7460 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
7461 binop(op
, mkexpr(g0
), mkU8(size
-1))
7469 putXMMReg( gregOfRM(rm
), mkexpr(g1
) );
7474 /* Vector by scalar shift of E by an immediate byte. */
7477 UInt
dis_SSE_shiftE_imm ( Int delta
, const HChar
* opname
, IROp op
)
7480 UChar rm
= getIByte(delta
);
7481 IRTemp e0
= newTemp(Ity_V128
);
7482 IRTemp e1
= newTemp(Ity_V128
);
7484 vassert(epartIsReg(rm
));
7485 vassert(gregOfRM(rm
) == 2
7486 || gregOfRM(rm
) == 4 || gregOfRM(rm
) == 6);
7487 amt
= getIByte(delta
+1);
7489 DIP("%s $%d,%s\n", opname
,
7491 nameXMMReg(eregOfRM(rm
)) );
7492 assign( e0
, getXMMReg(eregOfRM(rm
)) );
7494 shl
= shr
= sar
= False
;
7497 case Iop_ShlN16x8
: shl
= True
; size
= 16; break;
7498 case Iop_ShlN32x4
: shl
= True
; size
= 32; break;
7499 case Iop_ShlN64x2
: shl
= True
; size
= 64; break;
7500 case Iop_SarN16x8
: sar
= True
; size
= 16; break;
7501 case Iop_SarN32x4
: sar
= True
; size
= 32; break;
7502 case Iop_ShrN16x8
: shr
= True
; size
= 16; break;
7503 case Iop_ShrN32x4
: shr
= True
; size
= 32; break;
7504 case Iop_ShrN64x2
: shr
= True
; size
= 64; break;
7505 default: vassert(0);
7509 assign( e1
, amt
>= size
7511 : binop(op
, mkexpr(e0
), mkU8(amt
))
7515 assign( e1
, amt
>= size
7516 ? binop(op
, mkexpr(e0
), mkU8(size
-1))
7517 : binop(op
, mkexpr(e0
), mkU8(amt
))
7524 putXMMReg( eregOfRM(rm
), mkexpr(e1
) );
7529 /* Get the current SSE rounding mode. */
7531 static IRExpr
* /* :: Ity_I32 */ get_sse_roundingmode ( void )
7533 return binop( Iop_And32
,
7534 IRExpr_Get( OFFB_SSEROUND
, Ity_I32
),
7538 static void put_sse_roundingmode ( IRExpr
* sseround
)
7540 vassert(typeOfIRExpr(irsb
->tyenv
, sseround
) == Ity_I32
);
7541 stmt( IRStmt_Put( OFFB_SSEROUND
, sseround
) );
7544 /* Break a 128-bit value up into four 32-bit ints. */
7546 static void breakup128to32s ( IRTemp t128
,
7548 IRTemp
* t3
, IRTemp
* t2
,
7549 IRTemp
* t1
, IRTemp
* t0
)
7551 IRTemp hi64
= newTemp(Ity_I64
);
7552 IRTemp lo64
= newTemp(Ity_I64
);
7553 assign( hi64
, unop(Iop_V128HIto64
, mkexpr(t128
)) );
7554 assign( lo64
, unop(Iop_V128to64
, mkexpr(t128
)) );
7556 vassert(t0
&& *t0
== IRTemp_INVALID
);
7557 vassert(t1
&& *t1
== IRTemp_INVALID
);
7558 vassert(t2
&& *t2
== IRTemp_INVALID
);
7559 vassert(t3
&& *t3
== IRTemp_INVALID
);
7561 *t0
= newTemp(Ity_I32
);
7562 *t1
= newTemp(Ity_I32
);
7563 *t2
= newTemp(Ity_I32
);
7564 *t3
= newTemp(Ity_I32
);
7565 assign( *t0
, unop(Iop_64to32
, mkexpr(lo64
)) );
7566 assign( *t1
, unop(Iop_64HIto32
, mkexpr(lo64
)) );
7567 assign( *t2
, unop(Iop_64to32
, mkexpr(hi64
)) );
7568 assign( *t3
, unop(Iop_64HIto32
, mkexpr(hi64
)) );
7571 /* Construct a 128-bit value from four 32-bit ints. */
7573 static IRExpr
* mk128from32s ( IRTemp t3
, IRTemp t2
,
7574 IRTemp t1
, IRTemp t0
)
7577 binop( Iop_64HLtoV128
,
7578 binop(Iop_32HLto64
, mkexpr(t3
), mkexpr(t2
)),
7579 binop(Iop_32HLto64
, mkexpr(t1
), mkexpr(t0
))
7583 /* Break a 64-bit value up into four 16-bit ints. */
7585 static void breakup64to16s ( IRTemp t64
,
7587 IRTemp
* t3
, IRTemp
* t2
,
7588 IRTemp
* t1
, IRTemp
* t0
)
7590 IRTemp hi32
= newTemp(Ity_I32
);
7591 IRTemp lo32
= newTemp(Ity_I32
);
7592 assign( hi32
, unop(Iop_64HIto32
, mkexpr(t64
)) );
7593 assign( lo32
, unop(Iop_64to32
, mkexpr(t64
)) );
7595 vassert(t0
&& *t0
== IRTemp_INVALID
);
7596 vassert(t1
&& *t1
== IRTemp_INVALID
);
7597 vassert(t2
&& *t2
== IRTemp_INVALID
);
7598 vassert(t3
&& *t3
== IRTemp_INVALID
);
7600 *t0
= newTemp(Ity_I16
);
7601 *t1
= newTemp(Ity_I16
);
7602 *t2
= newTemp(Ity_I16
);
7603 *t3
= newTemp(Ity_I16
);
7604 assign( *t0
, unop(Iop_32to16
, mkexpr(lo32
)) );
7605 assign( *t1
, unop(Iop_32HIto16
, mkexpr(lo32
)) );
7606 assign( *t2
, unop(Iop_32to16
, mkexpr(hi32
)) );
7607 assign( *t3
, unop(Iop_32HIto16
, mkexpr(hi32
)) );
7610 /* Construct a 64-bit value from four 16-bit ints. */
7612 static IRExpr
* mk64from16s ( IRTemp t3
, IRTemp t2
,
7613 IRTemp t1
, IRTemp t0
)
7616 binop( Iop_32HLto64
,
7617 binop(Iop_16HLto32
, mkexpr(t3
), mkexpr(t2
)),
7618 binop(Iop_16HLto32
, mkexpr(t1
), mkexpr(t0
))
7622 /* Generate IR to set the guest %EFLAGS from the pushfl-format image
7623 in the given 32-bit temporary. The flags that are set are: O S Z A
7626 In all cases, code to set AC is generated. However, VEX actually
7627 ignores the AC value and so can optionally emit an emulation
7628 warning when it is enabled. In this routine, an emulation warning
7629 is only emitted if emit_AC_emwarn is True, in which case
7630 next_insn_EIP must be correct (this allows for correct code
7631 generation for popfl/popfw). If emit_AC_emwarn is False,
7632 next_insn_EIP is unimportant (this allows for easy if kludgey code
7633 generation for IRET.) */
7636 void set_EFLAGS_from_value ( IRTemp t1
,
7637 Bool emit_AC_emwarn
,
7638 Addr32 next_insn_EIP
)
7640 vassert(typeOfIRTemp(irsb
->tyenv
,t1
) == Ity_I32
);
7642 /* t1 is the flag word. Mask out everything except OSZACP and set
7643 the flags thunk to X86G_CC_OP_COPY. */
7644 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
7645 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
7646 stmt( IRStmt_Put( OFFB_CC_DEP1
,
7649 mkU32( X86G_CC_MASK_C
| X86G_CC_MASK_P
7650 | X86G_CC_MASK_A
| X86G_CC_MASK_Z
7651 | X86G_CC_MASK_S
| X86G_CC_MASK_O
)
7655 /* Set NDEP even though it isn't used. This makes redundant-PUT
7656 elimination of previous stores to this field work better. */
7657 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
7659 /* Also need to set the D flag, which is held in bit 10 of t1.
7660 If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */
7666 binop(Iop_Shr32
, mkexpr(t1
), mkU8(10)),
7672 /* Set the ID flag */
7678 binop(Iop_Shr32
, mkexpr(t1
), mkU8(21)),
7684 /* And set the AC flag. If setting it 1 to, possibly emit an
7685 emulation warning. */
7691 binop(Iop_Shr32
, mkexpr(t1
), mkU8(18)),
7697 if (emit_AC_emwarn
) {
7698 put_emwarn( mkU32(EmWarn_X86_acFlag
) );
7702 binop(Iop_And32
, mkexpr(t1
), mkU32(1<<18)),
7705 IRConst_U32( next_insn_EIP
),
7713 /* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit
7714 values (aa,bb), computes, for each of the 4 16-bit lanes:
7716 (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1
7718 static IRExpr
* dis_PMULHRSW_helper ( IRExpr
* aax
, IRExpr
* bbx
)
7720 IRTemp aa
= newTemp(Ity_I64
);
7721 IRTemp bb
= newTemp(Ity_I64
);
7722 IRTemp aahi32s
= newTemp(Ity_I64
);
7723 IRTemp aalo32s
= newTemp(Ity_I64
);
7724 IRTemp bbhi32s
= newTemp(Ity_I64
);
7725 IRTemp bblo32s
= newTemp(Ity_I64
);
7726 IRTemp rHi
= newTemp(Ity_I64
);
7727 IRTemp rLo
= newTemp(Ity_I64
);
7728 IRTemp one32x2
= newTemp(Ity_I64
);
7733 binop(Iop_InterleaveHI16x4
, mkexpr(aa
), mkexpr(aa
)),
7737 binop(Iop_InterleaveLO16x4
, mkexpr(aa
), mkexpr(aa
)),
7741 binop(Iop_InterleaveHI16x4
, mkexpr(bb
), mkexpr(bb
)),
7745 binop(Iop_InterleaveLO16x4
, mkexpr(bb
), mkexpr(bb
)),
7747 assign(one32x2
, mkU64( (1ULL << 32) + 1 ));
7756 binop(Iop_Mul32x2
, mkexpr(aahi32s
), mkexpr(bbhi32s
)),
7772 binop(Iop_Mul32x2
, mkexpr(aalo32s
), mkexpr(bblo32s
)),
7781 binop(Iop_CatEvenLanes16x4
, mkexpr(rHi
), mkexpr(rLo
));
7784 /* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit
7785 values (aa,bb), computes, for each lane:
7787 if aa_lane < 0 then - bb_lane
7788 else if aa_lane > 0 then bb_lane
7791 static IRExpr
* dis_PSIGN_helper ( IRExpr
* aax
, IRExpr
* bbx
, Int laneszB
)
7793 IRTemp aa
= newTemp(Ity_I64
);
7794 IRTemp bb
= newTemp(Ity_I64
);
7795 IRTemp zero
= newTemp(Ity_I64
);
7796 IRTemp bbNeg
= newTemp(Ity_I64
);
7797 IRTemp negMask
= newTemp(Ity_I64
);
7798 IRTemp posMask
= newTemp(Ity_I64
);
7799 IROp opSub
= Iop_INVALID
;
7800 IROp opCmpGTS
= Iop_INVALID
;
7803 case 1: opSub
= Iop_Sub8x8
; opCmpGTS
= Iop_CmpGT8Sx8
; break;
7804 case 2: opSub
= Iop_Sub16x4
; opCmpGTS
= Iop_CmpGT16Sx4
; break;
7805 case 4: opSub
= Iop_Sub32x2
; opCmpGTS
= Iop_CmpGT32Sx2
; break;
7806 default: vassert(0);
7811 assign( zero
, mkU64(0) );
7812 assign( bbNeg
, binop(opSub
, mkexpr(zero
), mkexpr(bb
)) );
7813 assign( negMask
, binop(opCmpGTS
, mkexpr(zero
), mkexpr(aa
)) );
7814 assign( posMask
, binop(opCmpGTS
, mkexpr(aa
), mkexpr(zero
)) );
7818 binop(Iop_And64
, mkexpr(bb
), mkexpr(posMask
)),
7819 binop(Iop_And64
, mkexpr(bbNeg
), mkexpr(negMask
)) );
7823 /* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit
7824 value aa, computes, for each lane
7826 if aa < 0 then -aa else aa
7828 Note that the result is interpreted as unsigned, so that the
7829 absolute value of the most negative signed input can be
7832 static IRExpr
* dis_PABS_helper ( IRExpr
* aax
, Int laneszB
)
7834 IRTemp aa
= newTemp(Ity_I64
);
7835 IRTemp zero
= newTemp(Ity_I64
);
7836 IRTemp aaNeg
= newTemp(Ity_I64
);
7837 IRTemp negMask
= newTemp(Ity_I64
);
7838 IRTemp posMask
= newTemp(Ity_I64
);
7839 IROp opSub
= Iop_INVALID
;
7840 IROp opSarN
= Iop_INVALID
;
7843 case 1: opSub
= Iop_Sub8x8
; opSarN
= Iop_SarN8x8
; break;
7844 case 2: opSub
= Iop_Sub16x4
; opSarN
= Iop_SarN16x4
; break;
7845 case 4: opSub
= Iop_Sub32x2
; opSarN
= Iop_SarN32x2
; break;
7846 default: vassert(0);
7850 assign( negMask
, binop(opSarN
, mkexpr(aa
), mkU8(8*laneszB
-1)) );
7851 assign( posMask
, unop(Iop_Not64
, mkexpr(negMask
)) );
7852 assign( zero
, mkU64(0) );
7853 assign( aaNeg
, binop(opSub
, mkexpr(zero
), mkexpr(aa
)) );
7856 binop(Iop_And64
, mkexpr(aa
), mkexpr(posMask
)),
7857 binop(Iop_And64
, mkexpr(aaNeg
), mkexpr(negMask
)) );
7860 static IRExpr
* dis_PALIGNR_XMM_helper ( IRTemp hi64
,
7861 IRTemp lo64
, Int byteShift
)
7863 vassert(byteShift
>= 1 && byteShift
<= 7);
7866 binop(Iop_Shl64
, mkexpr(hi64
), mkU8(8*(8-byteShift
))),
7867 binop(Iop_Shr64
, mkexpr(lo64
), mkU8(8*byteShift
))
7871 /* Generate a SIGSEGV followed by a restart of the current instruction
7872 if effective_addr is not 16-aligned. This is required behaviour
7873 for some SSE3 instructions and all 128-bit SSSE3 instructions.
7874 This assumes that guest_RIP_curr_instr is set correctly! */
7875 static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr
)
7880 binop(Iop_And32
,mkexpr(effective_addr
),mkU32(0xF)),
7883 IRConst_U32(guest_EIP_curr_instr
),
7890 /* Helper for deciding whether a given insn (starting at the opcode
7891 byte) may validly be used with a LOCK prefix. The following insns
7892 may be used with LOCK when their destination operand is in memory.
7893 AFAICS this is exactly the same for both 32-bit and 64-bit mode.
7895 ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01
7896 OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09
7897 ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11
7898 SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19
7899 AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21
7900 SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29
7901 XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31
7915 CMPXCHG 0F B0, 0F B1
7920 ------------------------------
7922 80 /0 = addb $imm8, rm8
7923 81 /0 = addl $imm32, rm32 and addw $imm16, rm16
7924 82 /0 = addb $imm8, rm8
7925 83 /0 = addl $simm8, rm32 and addw $simm8, rm16
7928 01 = addl r32, rm32 and addw r16, rm16
7930 Same for ADD OR ADC SBB AND SUB XOR
7933 FF /1 = dec rm32 and dec rm16
7936 FF /0 = inc rm32 and inc rm16
7939 F7 /3 = neg rm32 and neg rm16
7942 F7 /2 = not rm32 and not rm16
7944 0F BB = btcw r16, rm16 and btcl r32, rm32
7945 OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32
7949 static Bool
can_be_used_with_LOCK_prefix ( const UChar
* opc
)
7952 case 0x00: case 0x01: case 0x08: case 0x09:
7953 case 0x10: case 0x11: case 0x18: case 0x19:
7954 case 0x20: case 0x21: case 0x28: case 0x29:
7955 case 0x30: case 0x31:
7956 if (!epartIsReg(opc
[1]))
7960 case 0x80: case 0x81: case 0x82: case 0x83:
7961 if (gregOfRM(opc
[1]) >= 0 && gregOfRM(opc
[1]) <= 6
7962 && !epartIsReg(opc
[1]))
7966 case 0xFE: case 0xFF:
7967 if (gregOfRM(opc
[1]) >= 0 && gregOfRM(opc
[1]) <= 1
7968 && !epartIsReg(opc
[1]))
7972 case 0xF6: case 0xF7:
7973 if (gregOfRM(opc
[1]) >= 2 && gregOfRM(opc
[1]) <= 3
7974 && !epartIsReg(opc
[1]))
7978 case 0x86: case 0x87:
7979 if (!epartIsReg(opc
[1]))
7985 case 0xBB: case 0xB3: case 0xAB:
7986 if (!epartIsReg(opc
[2]))
7990 if (gregOfRM(opc
[2]) >= 5 && gregOfRM(opc
[2]) <= 7
7991 && !epartIsReg(opc
[2]))
7994 case 0xB0: case 0xB1:
7995 if (!epartIsReg(opc
[2]))
7999 if (gregOfRM(opc
[2]) == 1 && !epartIsReg(opc
[2]) )
8002 case 0xC0: case 0xC1:
8003 if (!epartIsReg(opc
[2]))
8008 } /* switch (opc[1]) */
8014 } /* switch (opc[0]) */
8019 static IRTemp
math_BSWAP ( IRTemp t1
, IRType ty
)
8021 IRTemp t2
= newTemp(ty
);
8022 if (ty
== Ity_I32
) {
8026 binop(Iop_Shl32
, mkexpr(t1
), mkU8(24)),
8029 binop(Iop_And32
, binop(Iop_Shl32
, mkexpr(t1
), mkU8(8)),
8032 binop(Iop_And32
, binop(Iop_Shr32
, mkexpr(t1
), mkU8(8)),
8034 binop(Iop_And32
, binop(Iop_Shr32
, mkexpr(t1
), mkU8(24)),
8040 if (ty
== Ity_I16
) {
8043 binop(Iop_Shl16
, mkexpr(t1
), mkU8(8)),
8044 binop(Iop_Shr16
, mkexpr(t1
), mkU8(8)) ));
8049 return IRTemp_INVALID
;
8052 /*------------------------------------------------------------*/
8053 /*--- Disassemble a single instruction ---*/
8054 /*------------------------------------------------------------*/
8056 /* Disassemble a single instruction into IR. The instruction is
8057 located in host memory at &guest_code[delta]. *expect_CAS is set
8058 to True if the resulting IR is expected to contain an IRCAS
8059 statement, and False if it's not expected to. This makes it
8060 possible for the caller of disInstr_X86_WRK to check that
8061 LOCK-prefixed instructions are at least plausibly translated, in
8062 that it becomes possible to check that a (validly) LOCK-prefixed
8063 instruction generates a translation containing an IRCAS, and
8064 instructions without LOCK prefixes don't generate translations
8065 containing an IRCAS.
8068 DisResult
disInstr_X86_WRK (
8069 /*OUT*/Bool
* expect_CAS
,
8071 const VexArchInfo
* archinfo
,
8072 const VexAbiInfo
* vbi
,
8077 IRTemp addr
, t0
, t1
, t2
, t3
, t4
, t5
, t6
;
8079 UChar opc
, modrm
, abyte
, pre
;
8082 Int am_sz
, d_sz
, n_prefixes
;
8084 const UChar
* insn
; /* used in SSE decoders */
8086 /* The running delta */
8087 Int delta
= (Int
)delta64
;
8089 /* Holds eip at the start of the insn, so that we can print
8090 consistent error messages for unimplemented insns. */
8091 Int delta_start
= delta
;
8093 /* sz denotes the nominal data-op size of the insn; we change it to
8094 2 if an 0x66 prefix is seen */
8097 /* sorb holds the segment-override-prefix byte, if any. Zero if no
8098 prefix has been seen, else one of {0x26, 0x36, 0x3E, 0x64, 0x65}
8099 indicating the prefix. */
8102 /* Gets set to True if a LOCK prefix is seen. */
8103 Bool pfx_lock
= False
;
8105 /* Set result defaults. */
8106 dres
.whatNext
= Dis_Continue
;
8108 dres
.hint
= Dis_HintNone
;
8109 dres
.jk_StopHere
= Ijk_INVALID
;
8111 *expect_CAS
= False
;
8113 addr
= t0
= t1
= t2
= t3
= t4
= t5
= t6
= IRTemp_INVALID
;
8115 vassert(guest_EIP_bbstart
+ delta
== guest_EIP_curr_instr
);
8116 DIP("\t0x%x: ", guest_EIP_bbstart
+delta
);
8118 /* Spot "Special" instructions (see comment at top of file). */
8120 const UChar
* code
= guest_code
+ delta
;
8121 /* Spot the 12-byte preamble:
8122 C1C703 roll $3, %edi
8123 C1C70D roll $13, %edi
8124 C1C71D roll $29, %edi
8125 C1C713 roll $19, %edi
8127 if (code
[ 0] == 0xC1 && code
[ 1] == 0xC7 && code
[ 2] == 0x03 &&
8128 code
[ 3] == 0xC1 && code
[ 4] == 0xC7 && code
[ 5] == 0x0D &&
8129 code
[ 6] == 0xC1 && code
[ 7] == 0xC7 && code
[ 8] == 0x1D &&
8130 code
[ 9] == 0xC1 && code
[10] == 0xC7 && code
[11] == 0x13) {
8131 /* Got a "Special" instruction preamble. Which one is it? */
8132 if (code
[12] == 0x87 && code
[13] == 0xDB /* xchgl %ebx,%ebx */) {
8133 /* %EDX = client_request ( %EAX ) */
8134 DIP("%%edx = client_request ( %%eax )\n");
8136 jmp_lit(&dres
, Ijk_ClientReq
, guest_EIP_bbstart
+delta
);
8137 vassert(dres
.whatNext
== Dis_StopHere
);
8138 goto decode_success
;
8141 if (code
[12] == 0x87 && code
[13] == 0xC9 /* xchgl %ecx,%ecx */) {
8142 /* %EAX = guest_NRADDR */
8143 DIP("%%eax = guest_NRADDR\n");
8145 putIReg(4, R_EAX
, IRExpr_Get( OFFB_NRADDR
, Ity_I32
));
8146 goto decode_success
;
8149 if (code
[12] == 0x87 && code
[13] == 0xD2 /* xchgl %edx,%edx */) {
8150 /* call-noredir *%EAX */
8151 DIP("call-noredir *%%eax\n");
8153 t1
= newTemp(Ity_I32
);
8154 assign(t1
, getIReg(4,R_EAX
));
8155 t2
= newTemp(Ity_I32
);
8156 assign(t2
, binop(Iop_Sub32
, getIReg(4,R_ESP
), mkU32(4)));
8157 putIReg(4, R_ESP
, mkexpr(t2
));
8158 storeLE( mkexpr(t2
), mkU32(guest_EIP_bbstart
+delta
));
8159 jmp_treg(&dres
, Ijk_NoRedir
, t1
);
8160 vassert(dres
.whatNext
== Dis_StopHere
);
8161 goto decode_success
;
8164 if (code
[12] == 0x87 && code
[13] == 0xFF /* xchgl %edi,%edi */) {
8166 DIP("IR injection\n");
8167 vex_inject_ir(irsb
, Iend_LE
);
8169 // Invalidate the current insn. The reason is that the IRop we're
8170 // injecting here can change. In which case the translation has to
8171 // be redone. For ease of handling, we simply invalidate all the
8173 stmt(IRStmt_Put(OFFB_CMSTART
, mkU32(guest_EIP_curr_instr
)));
8174 stmt(IRStmt_Put(OFFB_CMLEN
, mkU32(14)));
8178 stmt( IRStmt_Put( OFFB_EIP
, mkU32(guest_EIP_bbstart
+ delta
) ) );
8179 dres
.whatNext
= Dis_StopHere
;
8180 dres
.jk_StopHere
= Ijk_InvalICache
;
8181 goto decode_success
;
8183 /* We don't know what it is. */
8184 goto decode_failure
;
8189 /* Handle a couple of weird-ass NOPs that have been observed in the
8192 const UChar
* code
= guest_code
+ delta
;
8193 /* Sun's JVM 1.5.0 uses the following as a NOP:
8194 26 2E 64 65 90 %es:%cs:%fs:%gs:nop */
8195 if (code
[0] == 0x26 && code
[1] == 0x2E && code
[2] == 0x64
8196 && code
[3] == 0x65 && code
[4] == 0x90) {
8197 DIP("%%es:%%cs:%%fs:%%gs:nop\n");
8199 goto decode_success
;
8201 /* Don't barf on recent binutils padding,
8202 all variants of which are: nopw %cs:0x0(%eax,%eax,1)
8203 66 2e 0f 1f 84 00 00 00 00 00
8204 66 66 2e 0f 1f 84 00 00 00 00 00
8205 66 66 66 2e 0f 1f 84 00 00 00 00 00
8206 66 66 66 66 2e 0f 1f 84 00 00 00 00 00
8207 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00
8208 66 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00
8210 if (code
[0] == 0x66) {
8212 for (data16_cnt
= 1; data16_cnt
< 6; data16_cnt
++)
8213 if (code
[data16_cnt
] != 0x66)
8215 if (code
[data16_cnt
] == 0x2E && code
[data16_cnt
+ 1] == 0x0F
8216 && code
[data16_cnt
+ 2] == 0x1F && code
[data16_cnt
+ 3] == 0x84
8217 && code
[data16_cnt
+ 4] == 0x00 && code
[data16_cnt
+ 5] == 0x00
8218 && code
[data16_cnt
+ 6] == 0x00 && code
[data16_cnt
+ 7] == 0x00
8219 && code
[data16_cnt
+ 8] == 0x00 ) {
8220 DIP("nopw %%cs:0x0(%%eax,%%eax,1)\n");
8221 delta
+= 9 + data16_cnt
;
8222 goto decode_success
;
8226 // Intel CET requires the following opcodes to be treated as NOPs
8227 // with any prefix and ModRM, SIB and disp combination:
8228 // "0F 19", "0F 1C", "0F 1D", "0F 1E", "0F 1F"
8229 UInt opcode_index
= 0;
8230 // Skip any prefix combination
8231 UInt addr_override
= 0;
8233 Bool is_prefix
= True
;
8235 switch (code
[opcode_index
]) {
8244 case 0x26: case 0x3E: // if we set segment override here,
8245 case 0x64: case 0x65: // disAMode segfaults
8246 case 0x2E: case 0x36:
8247 case 0xF0: case 0xF2: case 0xF3:
8255 if (code
[opcode_index
] == 0x0F) {
8256 switch (code
[opcode_index
+1]) {
8258 case 0x1C: case 0x1D:
8259 case 0x1E: case 0x1F:
8260 delta
+= opcode_index
+2;
8261 modrm
= getUChar(delta
);
8262 if (epartIsReg(modrm
)) {
8264 DIP("nop%c\n", nameISize(temp_sz
));
8267 addr
= disAMode(&alen
, 0/*"no sorb"*/, delta
, dis_buf
);
8268 delta
+= alen
- addr_override
;
8269 DIP("nop%c %s\n", nameISize(temp_sz
), dis_buf
);
8271 goto decode_success
;
8277 /* Normal instruction handling starts here. */
8279 /* Deal with some but not all prefixes:
8282 2E(cs:) 3E(ds:) 26(es:) 64(fs:) 65(gs:) 36(ss:)
8283 Not dealt with (left in place):
8288 if (n_prefixes
> 7) goto decode_failure
;
8289 pre
= getUChar(delta
);
8298 case 0x3E: /* %DS: */
8299 case 0x26: /* %ES: */
8300 case 0x64: /* %FS: */
8301 case 0x65: /* %GS: */
8302 case 0x36: /* %SS: */
8304 goto decode_failure
; /* only one seg override allowed */
8307 case 0x2E: { /* %CS: */
8308 /* 2E prefix on a conditional branch instruction is a
8309 branch-prediction hint, which can safely be ignored. */
8310 UChar op1
= getIByte(delta
+1);
8311 UChar op2
= getIByte(delta
+2);
8312 if ((op1
>= 0x70 && op1
<= 0x7F)
8314 || (op1
== 0x0F && op2
>= 0x80 && op2
<= 0x8F)) {
8315 if (0) vex_printf("vex x86->IR: ignoring branch hint\n");
8317 /* All other CS override cases are not handled */
8318 goto decode_failure
;
8331 /* Now we should be looking at the primary opcode byte or the
8332 leading F2 or F3. Check that any LOCK prefix is actually
8336 if (can_be_used_with_LOCK_prefix( &guest_code
[delta
] )) {
8339 *expect_CAS
= False
;
8340 goto decode_failure
;
8345 /* ---------------------------------------------------- */
8346 /* --- The SSE decoder. --- */
8347 /* ---------------------------------------------------- */
8349 /* What did I do to deserve SSE ? Perhaps I was really bad in a
8352 /* Note, this doesn't handle SSE2 or SSE3. That is handled in a
8353 later section, further on. */
8355 insn
= &guest_code
[delta
];
8357 /* Treat fxsave specially. It should be doable even on an SSE0
8358 (Pentium-II class) CPU. Hence be prepared to handle it on
8359 any subarchitecture variant.
8362 /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory */
8363 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xAE
8364 && !epartIsReg(insn
[2]) && gregOfRM(insn
[2]) == 0) {
8366 modrm
= getIByte(delta
+2);
8368 vassert(!epartIsReg(modrm
));
8370 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
8372 gen_SEGV_if_not_16_aligned(addr
);
8374 DIP("fxsave %s\n", dis_buf
);
8376 /* Uses dirty helper:
8377 void x86g_do_FXSAVE ( VexGuestX86State*, UInt ) */
8378 d
= unsafeIRDirty_0_N (
8380 "x86g_dirtyhelper_FXSAVE",
8381 &x86g_dirtyhelper_FXSAVE
,
8382 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
8385 /* declare we're writing memory */
8387 d
->mAddr
= mkexpr(addr
);
8388 d
->mSize
= 464; /* according to recent Intel docs */
8390 /* declare we're reading guest state */
8392 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
8394 d
->fxState
[0].fx
= Ifx_Read
;
8395 d
->fxState
[0].offset
= OFFB_FTOP
;
8396 d
->fxState
[0].size
= sizeof(UInt
);
8398 d
->fxState
[1].fx
= Ifx_Read
;
8399 d
->fxState
[1].offset
= OFFB_FPREGS
;
8400 d
->fxState
[1].size
= 8 * sizeof(ULong
);
8402 d
->fxState
[2].fx
= Ifx_Read
;
8403 d
->fxState
[2].offset
= OFFB_FPTAGS
;
8404 d
->fxState
[2].size
= 8 * sizeof(UChar
);
8406 d
->fxState
[3].fx
= Ifx_Read
;
8407 d
->fxState
[3].offset
= OFFB_FPROUND
;
8408 d
->fxState
[3].size
= sizeof(UInt
);
8410 d
->fxState
[4].fx
= Ifx_Read
;
8411 d
->fxState
[4].offset
= OFFB_FC3210
;
8412 d
->fxState
[4].size
= sizeof(UInt
);
8414 d
->fxState
[5].fx
= Ifx_Read
;
8415 d
->fxState
[5].offset
= OFFB_XMM0
;
8416 d
->fxState
[5].size
= 8 * sizeof(U128
);
8418 d
->fxState
[6].fx
= Ifx_Read
;
8419 d
->fxState
[6].offset
= OFFB_SSEROUND
;
8420 d
->fxState
[6].size
= sizeof(UInt
);
8422 /* Be paranoid ... this assertion tries to ensure the 8 %xmm
8423 images are packed back-to-back. If not, the value of
8424 d->fxState[5].size is wrong. */
8425 vassert(16 == sizeof(U128
));
8426 vassert(OFFB_XMM7
== (OFFB_XMM0
+ 7 * 16));
8428 stmt( IRStmt_Dirty(d
) );
8430 goto decode_success
;
8433 /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory */
8434 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xAE
8435 && !epartIsReg(insn
[2]) && gregOfRM(insn
[2]) == 1) {
8437 modrm
= getIByte(delta
+2);
8439 vassert(!epartIsReg(modrm
));
8441 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
8443 gen_SEGV_if_not_16_aligned(addr
);
8445 DIP("fxrstor %s\n", dis_buf
);
8447 /* Uses dirty helper:
8448 VexEmNote x86g_do_FXRSTOR ( VexGuestX86State*, UInt )
8450 the VexEmNote value is simply ignored (unlike for FRSTOR)
8452 d
= unsafeIRDirty_0_N (
8454 "x86g_dirtyhelper_FXRSTOR",
8455 &x86g_dirtyhelper_FXRSTOR
,
8456 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
8459 /* declare we're reading memory */
8461 d
->mAddr
= mkexpr(addr
);
8462 d
->mSize
= 464; /* according to recent Intel docs */
8464 /* declare we're writing guest state */
8466 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
8468 d
->fxState
[0].fx
= Ifx_Write
;
8469 d
->fxState
[0].offset
= OFFB_FTOP
;
8470 d
->fxState
[0].size
= sizeof(UInt
);
8472 d
->fxState
[1].fx
= Ifx_Write
;
8473 d
->fxState
[1].offset
= OFFB_FPREGS
;
8474 d
->fxState
[1].size
= 8 * sizeof(ULong
);
8476 d
->fxState
[2].fx
= Ifx_Write
;
8477 d
->fxState
[2].offset
= OFFB_FPTAGS
;
8478 d
->fxState
[2].size
= 8 * sizeof(UChar
);
8480 d
->fxState
[3].fx
= Ifx_Write
;
8481 d
->fxState
[3].offset
= OFFB_FPROUND
;
8482 d
->fxState
[3].size
= sizeof(UInt
);
8484 d
->fxState
[4].fx
= Ifx_Write
;
8485 d
->fxState
[4].offset
= OFFB_FC3210
;
8486 d
->fxState
[4].size
= sizeof(UInt
);
8488 d
->fxState
[5].fx
= Ifx_Write
;
8489 d
->fxState
[5].offset
= OFFB_XMM0
;
8490 d
->fxState
[5].size
= 8 * sizeof(U128
);
8492 d
->fxState
[6].fx
= Ifx_Write
;
8493 d
->fxState
[6].offset
= OFFB_SSEROUND
;
8494 d
->fxState
[6].size
= sizeof(UInt
);
8496 /* Be paranoid ... this assertion tries to ensure the 8 %xmm
8497 images are packed back-to-back. If not, the value of
8498 d->fxState[5].size is wrong. */
8499 vassert(16 == sizeof(U128
));
8500 vassert(OFFB_XMM7
== (OFFB_XMM0
+ 7 * 16));
8502 stmt( IRStmt_Dirty(d
) );
8504 goto decode_success
;
8507 /* ------ SSE decoder main ------ */
8509 /* Skip parts of the decoder which don't apply given the stated
8510 guest subarchitecture. */
8511 if (archinfo
->hwcaps
== 0/*baseline, no sse at all*/)
8512 goto after_sse_decoders
;
8514 /* With mmxext only some extended MMX instructions are recognized.
8515 The mmxext instructions are MASKMOVQ MOVNTQ PAVGB PAVGW PMAXSW
8516 PMAXUB PMINSW PMINUB PMULHUW PSADBW PSHUFW PEXTRW PINSRW PMOVMSKB
8517 PREFETCHNTA PREFETCHT0 PREFETCHT1 PREFETCHT2 SFENCE
8519 http://support.amd.com/us/Embedded_TechDocs/22466.pdf
8520 https://en.wikipedia.org/wiki/3DNow!#3DNow.21_extensions */
8522 if (archinfo
->hwcaps
== VEX_HWCAPS_X86_MMXEXT
/*integer only sse1 subset*/)
8525 /* Otherwise we must be doing sse1 or sse2, so we can at least try
8528 /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */
8529 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x58) {
8530 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "addps", Iop_Add32Fx4
);
8531 goto decode_success
;
8534 /* F3 0F 58 = ADDSS -- add 32F0x4 from R/M to R */
8535 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x58) {
8537 delta
= dis_SSE_E_to_G_lo32( sorb
, delta
+3, "addss", Iop_Add32F0x4
);
8538 goto decode_success
;
8541 /* 0F 55 = ANDNPS -- G = (not G) and E */
8542 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x55) {
8543 delta
= dis_SSE_E_to_G_all_invG( sorb
, delta
+2, "andnps", Iop_AndV128
);
8544 goto decode_success
;
8547 /* 0F 54 = ANDPS -- G = G and E */
8548 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x54) {
8549 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "andps", Iop_AndV128
);
8550 goto decode_success
;
8553 /* 0F C2 = CMPPS -- 32Fx4 comparison from R/M to R */
8554 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xC2) {
8555 delta
= dis_SSEcmp_E_to_G( sorb
, delta
+2, "cmpps", True
, 4 );
8556 goto decode_success
;
8559 /* F3 0F C2 = CMPSS -- 32F0x4 comparison from R/M to R */
8560 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0xC2) {
8562 delta
= dis_SSEcmp_E_to_G( sorb
, delta
+3, "cmpss", False
, 4 );
8563 goto decode_success
;
8566 /* 0F 2F = COMISS -- 32F0x4 comparison G,E, and set ZCP */
8567 /* 0F 2E = UCOMISS -- 32F0x4 comparison G,E, and set ZCP */
8568 if (sz
== 4 && insn
[0] == 0x0F && (insn
[1] == 0x2F || insn
[1] == 0x2E)) {
8569 IRTemp argL
= newTemp(Ity_F32
);
8570 IRTemp argR
= newTemp(Ity_F32
);
8571 modrm
= getIByte(delta
+2);
8572 if (epartIsReg(modrm
)) {
8573 assign( argR
, getXMMRegLane32F( eregOfRM(modrm
), 0/*lowest lane*/ ) );
8575 DIP("[u]comiss %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
8576 nameXMMReg(gregOfRM(modrm
)) );
8578 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
8579 assign( argR
, loadLE(Ity_F32
, mkexpr(addr
)) );
8581 DIP("[u]comiss %s,%s\n", dis_buf
,
8582 nameXMMReg(gregOfRM(modrm
)) );
8584 assign( argL
, getXMMRegLane32F( gregOfRM(modrm
), 0/*lowest lane*/ ) );
8586 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
8587 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
8592 unop(Iop_F32toF64
,mkexpr(argL
)),
8593 unop(Iop_F32toF64
,mkexpr(argR
))),
8596 /* Set NDEP even though it isn't used. This makes redundant-PUT
8597 elimination of previous stores to this field work better. */
8598 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
8599 goto decode_success
;
8602 /* 0F 2A = CVTPI2PS -- convert 2 x I32 in mem/mmx to 2 x F32 in low
8604 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x2A) {
8605 IRTemp arg64
= newTemp(Ity_I64
);
8606 IRTemp rmode
= newTemp(Ity_I32
);
8609 modrm
= getIByte(delta
+2);
8610 if (epartIsReg(modrm
)) {
8611 /* Only switch to MMX mode if the source is a MMX register.
8612 See comments on CVTPI2PD for details. Fixes #357059. */
8614 assign( arg64
, getMMXReg(eregOfRM(modrm
)) );
8616 DIP("cvtpi2ps %s,%s\n", nameMMXReg(eregOfRM(modrm
)),
8617 nameXMMReg(gregOfRM(modrm
)));
8619 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
8620 assign( arg64
, loadLE(Ity_I64
, mkexpr(addr
)) );
8622 DIP("cvtpi2ps %s,%s\n", dis_buf
,
8623 nameXMMReg(gregOfRM(modrm
)) );
8626 assign( rmode
, get_sse_roundingmode() );
8633 unop(Iop_64to32
, mkexpr(arg64
)) )) );
8640 unop(Iop_64HIto32
, mkexpr(arg64
)) )) );
8642 goto decode_success
;
8645 /* F3 0F 2A = CVTSI2SS -- convert I32 in mem/ireg to F32 in low
8647 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x2A) {
8648 IRTemp arg32
= newTemp(Ity_I32
);
8649 IRTemp rmode
= newTemp(Ity_I32
);
8652 modrm
= getIByte(delta
+3);
8653 if (epartIsReg(modrm
)) {
8654 assign( arg32
, getIReg(4, eregOfRM(modrm
)) );
8656 DIP("cvtsi2ss %s,%s\n", nameIReg(4, eregOfRM(modrm
)),
8657 nameXMMReg(gregOfRM(modrm
)));
8659 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
8660 assign( arg32
, loadLE(Ity_I32
, mkexpr(addr
)) );
8662 DIP("cvtsi2ss %s,%s\n", dis_buf
,
8663 nameXMMReg(gregOfRM(modrm
)) );
8666 assign( rmode
, get_sse_roundingmode() );
8672 unop(Iop_I32StoF64
, mkexpr(arg32
)) ) );
8674 goto decode_success
;
8677 /* 0F 2D = CVTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
8678 I32 in mmx, according to prevailing SSE rounding mode */
8679 /* 0F 2C = CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
8680 I32 in mmx, rounding towards zero */
8681 if (sz
== 4 && insn
[0] == 0x0F && (insn
[1] == 0x2D || insn
[1] == 0x2C)) {
8682 IRTemp dst64
= newTemp(Ity_I64
);
8683 IRTemp rmode
= newTemp(Ity_I32
);
8684 IRTemp f32lo
= newTemp(Ity_F32
);
8685 IRTemp f32hi
= newTemp(Ity_F32
);
8686 Bool r2zero
= toBool(insn
[1] == 0x2C);
8689 modrm
= getIByte(delta
+2);
8691 if (epartIsReg(modrm
)) {
8693 assign(f32lo
, getXMMRegLane32F(eregOfRM(modrm
), 0));
8694 assign(f32hi
, getXMMRegLane32F(eregOfRM(modrm
), 1));
8695 DIP("cvt%sps2pi %s,%s\n", r2zero
? "t" : "",
8696 nameXMMReg(eregOfRM(modrm
)),
8697 nameMMXReg(gregOfRM(modrm
)));
8699 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
8700 assign(f32lo
, loadLE(Ity_F32
, mkexpr(addr
)));
8701 assign(f32hi
, loadLE(Ity_F32
, binop( Iop_Add32
,
8705 DIP("cvt%sps2pi %s,%s\n", r2zero
? "t" : "",
8707 nameMMXReg(gregOfRM(modrm
)));
8711 assign(rmode
, mkU32((UInt
)Irrm_ZERO
) );
8713 assign( rmode
, get_sse_roundingmode() );
8718 binop( Iop_32HLto64
,
8719 binop( Iop_F64toI32S
,
8721 unop( Iop_F32toF64
, mkexpr(f32hi
) ) ),
8722 binop( Iop_F64toI32S
,
8724 unop( Iop_F32toF64
, mkexpr(f32lo
) ) )
8728 putMMXReg(gregOfRM(modrm
), mkexpr(dst64
));
8729 goto decode_success
;
8732 /* F3 0F 2D = CVTSS2SI -- convert F32 in mem/low quarter xmm to
8733 I32 in ireg, according to prevailing SSE rounding mode */
8734 /* F3 0F 2C = CVTTSS2SI -- convert F32 in mem/low quarter xmm to
8735 I32 in ireg, rounding towards zero */
8736 if (insn
[0] == 0xF3 && insn
[1] == 0x0F
8737 && (insn
[2] == 0x2D || insn
[2] == 0x2C)) {
8738 IRTemp rmode
= newTemp(Ity_I32
);
8739 IRTemp f32lo
= newTemp(Ity_F32
);
8740 Bool r2zero
= toBool(insn
[2] == 0x2C);
8743 modrm
= getIByte(delta
+3);
8744 if (epartIsReg(modrm
)) {
8746 assign(f32lo
, getXMMRegLane32F(eregOfRM(modrm
), 0));
8747 DIP("cvt%sss2si %s,%s\n", r2zero
? "t" : "",
8748 nameXMMReg(eregOfRM(modrm
)),
8749 nameIReg(4, gregOfRM(modrm
)));
8751 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
8752 assign(f32lo
, loadLE(Ity_F32
, mkexpr(addr
)));
8754 DIP("cvt%sss2si %s,%s\n", r2zero
? "t" : "",
8756 nameIReg(4, gregOfRM(modrm
)));
8760 assign( rmode
, mkU32((UInt
)Irrm_ZERO
) );
8762 assign( rmode
, get_sse_roundingmode() );
8765 putIReg(4, gregOfRM(modrm
),
8766 binop( Iop_F64toI32S
,
8768 unop( Iop_F32toF64
, mkexpr(f32lo
) ) )
8771 goto decode_success
;
8774 /* 0F 5E = DIVPS -- div 32Fx4 from R/M to R */
8775 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x5E) {
8776 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "divps", Iop_Div32Fx4
);
8777 goto decode_success
;
8780 /* F3 0F 5E = DIVSS -- div 32F0x4 from R/M to R */
8781 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x5E) {
8783 delta
= dis_SSE_E_to_G_lo32( sorb
, delta
+3, "divss", Iop_Div32F0x4
);
8784 goto decode_success
;
8787 /* 0F AE /2 = LDMXCSR m32 -- load %mxcsr */
8788 if (insn
[0] == 0x0F && insn
[1] == 0xAE
8789 && !epartIsReg(insn
[2]) && gregOfRM(insn
[2]) == 2) {
8791 IRTemp t64
= newTemp(Ity_I64
);
8792 IRTemp ew
= newTemp(Ity_I32
);
8794 modrm
= getIByte(delta
+2);
8795 vassert(!epartIsReg(modrm
));
8798 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
8800 DIP("ldmxcsr %s\n", dis_buf
);
8802 /* The only thing we observe in %mxcsr is the rounding mode.
8803 Therefore, pass the 32-bit value (SSE native-format control
8804 word) to a clean helper, getting back a 64-bit value, the
8805 lower half of which is the SSEROUND value to store, and the
8806 upper half of which is the emulation-warning token which may
8809 /* ULong x86h_check_ldmxcsr ( UInt ); */
8810 assign( t64
, mkIRExprCCall(
8811 Ity_I64
, 0/*regparms*/,
8812 "x86g_check_ldmxcsr",
8813 &x86g_check_ldmxcsr
,
8814 mkIRExprVec_1( loadLE(Ity_I32
, mkexpr(addr
)) )
8818 put_sse_roundingmode( unop(Iop_64to32
, mkexpr(t64
)) );
8819 assign( ew
, unop(Iop_64HIto32
, mkexpr(t64
) ) );
8820 put_emwarn( mkexpr(ew
) );
8821 /* Finally, if an emulation warning was reported, side-exit to
8822 the next insn, reporting the warning, so that Valgrind's
8823 dispatcher sees the warning. */
8826 binop(Iop_CmpNE32
, mkexpr(ew
), mkU32(0)),
8828 IRConst_U32( ((Addr32
)guest_EIP_bbstart
)+delta
),
8832 goto decode_success
;
8836 /* mmxext sse1 subset starts here. mmxext only arches will parse
8837 only this subset of the sse1 instructions. */
8840 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8841 /* 0F F7 = MASKMOVQ -- 8x8 masked store */
8842 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xF7) {
8844 delta
= dis_MMX( &ok
, sorb
, sz
, delta
+1 );
8846 goto decode_failure
;
8847 goto decode_success
;
8850 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8851 /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the
8852 Intel manual does not say anything about the usual business of
8853 the FP reg tags getting trashed whenever an MMX insn happens.
8854 So we just leave them alone.
8856 if (insn
[0] == 0x0F && insn
[1] == 0xE7) {
8857 modrm
= getIByte(delta
+2);
8858 if (sz
== 4 && !epartIsReg(modrm
)) {
8859 /* do_MMX_preamble(); Intel docs don't specify this */
8860 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
8861 storeLE( mkexpr(addr
), getMMXReg(gregOfRM(modrm
)) );
8862 DIP("movntq %s,%s\n", dis_buf
,
8863 nameMMXReg(gregOfRM(modrm
)));
8865 goto decode_success
;
8867 /* else fall through */
8870 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8871 /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */
8872 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xE0) {
8874 delta
= dis_MMXop_regmem_to_reg (
8875 sorb
, delta
+2, insn
[1], "pavgb", False
);
8876 goto decode_success
;
8879 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8880 /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */
8881 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xE3) {
8883 delta
= dis_MMXop_regmem_to_reg (
8884 sorb
, delta
+2, insn
[1], "pavgw", False
);
8885 goto decode_success
;
8888 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8889 /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put
8890 zero-extend of it in ireg(G). */
8891 if (insn
[0] == 0x0F && insn
[1] == 0xC5) {
8893 if (sz
== 4 && epartIsReg(modrm
)) {
8894 IRTemp sV
= newTemp(Ity_I64
);
8895 t5
= newTemp(Ity_I16
);
8897 assign(sV
, getMMXReg(eregOfRM(modrm
)));
8898 breakup64to16s( sV
, &t3
, &t2
, &t1
, &t0
);
8899 switch (insn
[3] & 3) {
8900 case 0: assign(t5
, mkexpr(t0
)); break;
8901 case 1: assign(t5
, mkexpr(t1
)); break;
8902 case 2: assign(t5
, mkexpr(t2
)); break;
8903 case 3: assign(t5
, mkexpr(t3
)); break;
8904 default: vassert(0); /*NOTREACHED*/
8906 putIReg(4, gregOfRM(modrm
), unop(Iop_16Uto32
, mkexpr(t5
)));
8907 DIP("pextrw $%d,%s,%s\n",
8908 (Int
)insn
[3], nameMMXReg(eregOfRM(modrm
)),
8909 nameIReg(4,gregOfRM(modrm
)));
8911 goto decode_success
;
8913 /* else fall through */
8916 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8917 /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
8918 put it into the specified lane of mmx(G). */
8919 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xC4) {
8920 /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the
8921 mmx reg. t4 is the new lane value. t5 is the original
8922 mmx value. t6 is the new mmx value. */
8924 t4
= newTemp(Ity_I16
);
8925 t5
= newTemp(Ity_I64
);
8926 t6
= newTemp(Ity_I64
);
8930 assign(t5
, getMMXReg(gregOfRM(modrm
)));
8931 breakup64to16s( t5
, &t3
, &t2
, &t1
, &t0
);
8933 if (epartIsReg(modrm
)) {
8934 assign(t4
, getIReg(2, eregOfRM(modrm
)));
8937 DIP("pinsrw $%d,%s,%s\n", lane
,
8938 nameIReg(2,eregOfRM(modrm
)),
8939 nameMMXReg(gregOfRM(modrm
)));
8941 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
8943 lane
= insn
[3+alen
-1];
8944 assign(t4
, loadLE(Ity_I16
, mkexpr(addr
)));
8945 DIP("pinsrw $%d,%s,%s\n", lane
,
8947 nameMMXReg(gregOfRM(modrm
)));
8951 case 0: assign(t6
, mk64from16s(t3
,t2
,t1
,t4
)); break;
8952 case 1: assign(t6
, mk64from16s(t3
,t2
,t4
,t0
)); break;
8953 case 2: assign(t6
, mk64from16s(t3
,t4
,t1
,t0
)); break;
8954 case 3: assign(t6
, mk64from16s(t4
,t2
,t1
,t0
)); break;
8955 default: vassert(0); /*NOTREACHED*/
8957 putMMXReg(gregOfRM(modrm
), mkexpr(t6
));
8958 goto decode_success
;
8961 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8962 /* 0F EE = PMAXSW -- 16x4 signed max */
8963 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xEE) {
8965 delta
= dis_MMXop_regmem_to_reg (
8966 sorb
, delta
+2, insn
[1], "pmaxsw", False
);
8967 goto decode_success
;
8970 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8971 /* 0F DE = PMAXUB -- 8x8 unsigned max */
8972 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xDE) {
8974 delta
= dis_MMXop_regmem_to_reg (
8975 sorb
, delta
+2, insn
[1], "pmaxub", False
);
8976 goto decode_success
;
8979 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8980 /* 0F EA = PMINSW -- 16x4 signed min */
8981 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xEA) {
8983 delta
= dis_MMXop_regmem_to_reg (
8984 sorb
, delta
+2, insn
[1], "pminsw", False
);
8985 goto decode_success
;
8988 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8989 /* 0F DA = PMINUB -- 8x8 unsigned min */
8990 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xDA) {
8992 delta
= dis_MMXop_regmem_to_reg (
8993 sorb
, delta
+2, insn
[1], "pminub", False
);
8994 goto decode_success
;
8997 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
8998 /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in
8999 mmx(E), turn them into a byte, and put zero-extend of it in
9001 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xD7) {
9003 if (epartIsReg(modrm
)) {
9005 t0
= newTemp(Ity_I64
);
9006 t1
= newTemp(Ity_I32
);
9007 assign(t0
, getMMXReg(eregOfRM(modrm
)));
9008 assign(t1
, unop(Iop_8Uto32
, unop(Iop_GetMSBs8x8
, mkexpr(t0
))));
9009 putIReg(4, gregOfRM(modrm
), mkexpr(t1
));
9010 DIP("pmovmskb %s,%s\n", nameMMXReg(eregOfRM(modrm
)),
9011 nameIReg(4,gregOfRM(modrm
)));
9013 goto decode_success
;
9015 /* else fall through */
9018 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9019 /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */
9020 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xE4) {
9022 delta
= dis_MMXop_regmem_to_reg (
9023 sorb
, delta
+2, insn
[1], "pmuluh", False
);
9024 goto decode_success
;
9027 /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */
9028 /* 0F 18 /1 = PREFETCH0 -- with various different hints */
9029 /* 0F 18 /2 = PREFETCH1 */
9030 /* 0F 18 /3 = PREFETCH2 */
9031 if (insn
[0] == 0x0F && insn
[1] == 0x18
9032 && !epartIsReg(insn
[2])
9033 && gregOfRM(insn
[2]) >= 0 && gregOfRM(insn
[2]) <= 3) {
9034 const HChar
* hintstr
= "??";
9036 modrm
= getIByte(delta
+2);
9037 vassert(!epartIsReg(modrm
));
9039 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9042 switch (gregOfRM(modrm
)) {
9043 case 0: hintstr
= "nta"; break;
9044 case 1: hintstr
= "t0"; break;
9045 case 2: hintstr
= "t1"; break;
9046 case 3: hintstr
= "t2"; break;
9047 default: vassert(0); /*NOTREACHED*/
9050 DIP("prefetch%s %s\n", hintstr
, dis_buf
);
9051 goto decode_success
;
9054 /* 0F 0D /0 = PREFETCH m8 -- 3DNow! prefetch */
9055 /* 0F 0D /1 = PREFETCHW m8 -- ditto, with some other hint */
9056 if (insn
[0] == 0x0F && insn
[1] == 0x0D
9057 && !epartIsReg(insn
[2])
9058 && gregOfRM(insn
[2]) >= 0 && gregOfRM(insn
[2]) <= 1) {
9059 const HChar
* hintstr
= "??";
9061 modrm
= getIByte(delta
+2);
9062 vassert(!epartIsReg(modrm
));
9064 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9067 switch (gregOfRM(modrm
)) {
9068 case 0: hintstr
= ""; break;
9069 case 1: hintstr
= "w"; break;
9070 default: vassert(0); /*NOTREACHED*/
9073 DIP("prefetch%s %s\n", hintstr
, dis_buf
);
9074 goto decode_success
;
9077 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9078 /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */
9079 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xF6) {
9081 delta
= dis_MMXop_regmem_to_reg (
9082 sorb
, delta
+2, insn
[1], "psadbw", False
);
9083 goto decode_success
;
9086 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
9087 /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */
9088 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x70) {
9090 IRTemp sV
, dV
, s3
, s2
, s1
, s0
;
9091 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
9092 sV
= newTemp(Ity_I64
);
9093 dV
= newTemp(Ity_I64
);
9096 if (epartIsReg(modrm
)) {
9097 assign( sV
, getMMXReg(eregOfRM(modrm
)) );
9098 order
= (Int
)insn
[3];
9100 DIP("pshufw $%d,%s,%s\n", order
,
9101 nameMMXReg(eregOfRM(modrm
)),
9102 nameMMXReg(gregOfRM(modrm
)));
9104 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9105 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
9106 order
= (Int
)insn
[2+alen
];
9108 DIP("pshufw $%d,%s,%s\n", order
,
9110 nameMMXReg(gregOfRM(modrm
)));
9112 breakup64to16s( sV
, &s3
, &s2
, &s1
, &s0
);
9115 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
9117 mk64from16s( SEL((order
>>6)&3), SEL((order
>>4)&3),
9118 SEL((order
>>2)&3), SEL((order
>>0)&3) )
9120 putMMXReg(gregOfRM(modrm
), mkexpr(dV
));
9122 goto decode_success
;
9125 /* 0F AE /7 = SFENCE -- flush pending operations to memory */
9126 if (insn
[0] == 0x0F && insn
[1] == 0xAE
9127 && epartIsReg(insn
[2]) && gregOfRM(insn
[2]) == 7) {
9130 /* Insert a memory fence. It's sometimes important that these
9131 are carried through to the generated code. */
9132 stmt( IRStmt_MBE(Imbe_Fence
) );
9134 goto decode_success
;
9137 /* End of mmxext sse1 subset. No more sse parsing for mmxext only arches. */
9138 if (archinfo
->hwcaps
== VEX_HWCAPS_X86_MMXEXT
/*integer only sse1 subset*/)
9139 goto after_sse_decoders
;
9142 /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */
9143 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x5F) {
9144 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "maxps", Iop_Max32Fx4
);
9145 goto decode_success
;
9148 /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */
9149 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x5F) {
9151 delta
= dis_SSE_E_to_G_lo32( sorb
, delta
+3, "maxss", Iop_Max32F0x4
);
9152 goto decode_success
;
9155 /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */
9156 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x5D) {
9157 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "minps", Iop_Min32Fx4
);
9158 goto decode_success
;
9161 /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */
9162 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x5D) {
9164 delta
= dis_SSE_E_to_G_lo32( sorb
, delta
+3, "minss", Iop_Min32F0x4
);
9165 goto decode_success
;
9168 /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */
9169 /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */
9170 if (sz
== 4 && insn
[0] == 0x0F && (insn
[1] == 0x28 || insn
[1] == 0x10)) {
9171 modrm
= getIByte(delta
+2);
9172 if (epartIsReg(modrm
)) {
9173 putXMMReg( gregOfRM(modrm
),
9174 getXMMReg( eregOfRM(modrm
) ));
9175 DIP("mov[ua]ps %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
9176 nameXMMReg(gregOfRM(modrm
)));
9179 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9180 if (insn
[1] == 0x28/*movaps*/)
9181 gen_SEGV_if_not_16_aligned( addr
);
9182 putXMMReg( gregOfRM(modrm
),
9183 loadLE(Ity_V128
, mkexpr(addr
)) );
9184 DIP("mov[ua]ps %s,%s\n", dis_buf
,
9185 nameXMMReg(gregOfRM(modrm
)));
9188 goto decode_success
;
9191 /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */
9192 /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */
9193 if (sz
== 4 && insn
[0] == 0x0F
9194 && (insn
[1] == 0x29 || insn
[1] == 0x11)) {
9195 modrm
= getIByte(delta
+2);
9196 if (epartIsReg(modrm
)) {
9197 /* fall through; awaiting test case */
9199 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9200 if (insn
[1] == 0x29/*movaps*/)
9201 gen_SEGV_if_not_16_aligned( addr
);
9202 storeLE( mkexpr(addr
), getXMMReg(gregOfRM(modrm
)) );
9203 DIP("mov[ua]ps %s,%s\n", nameXMMReg(gregOfRM(modrm
)),
9206 goto decode_success
;
9210 /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */
9211 /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */
9212 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x16) {
9213 modrm
= getIByte(delta
+2);
9214 if (epartIsReg(modrm
)) {
9216 putXMMRegLane64( gregOfRM(modrm
), 1/*upper lane*/,
9217 getXMMRegLane64( eregOfRM(modrm
), 0 ) );
9218 DIP("movhps %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
9219 nameXMMReg(gregOfRM(modrm
)));
9221 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9223 putXMMRegLane64( gregOfRM(modrm
), 1/*upper lane*/,
9224 loadLE(Ity_I64
, mkexpr(addr
)) );
9225 DIP("movhps %s,%s\n", dis_buf
,
9226 nameXMMReg( gregOfRM(modrm
) ));
9228 goto decode_success
;
9231 /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */
9232 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x17) {
9233 if (!epartIsReg(insn
[2])) {
9235 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
9237 storeLE( mkexpr(addr
),
9238 getXMMRegLane64( gregOfRM(insn
[2]),
9239 1/*upper lane*/ ) );
9240 DIP("movhps %s,%s\n", nameXMMReg( gregOfRM(insn
[2]) ),
9242 goto decode_success
;
9244 /* else fall through */
9247 /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */
9248 /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */
9249 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x12) {
9250 modrm
= getIByte(delta
+2);
9251 if (epartIsReg(modrm
)) {
9253 putXMMRegLane64( gregOfRM(modrm
),
9255 getXMMRegLane64( eregOfRM(modrm
), 1 ));
9256 DIP("movhlps %s, %s\n", nameXMMReg(eregOfRM(modrm
)),
9257 nameXMMReg(gregOfRM(modrm
)));
9259 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9261 putXMMRegLane64( gregOfRM(modrm
), 0/*lower lane*/,
9262 loadLE(Ity_I64
, mkexpr(addr
)) );
9263 DIP("movlps %s, %s\n",
9264 dis_buf
, nameXMMReg( gregOfRM(modrm
) ));
9266 goto decode_success
;
9269 /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */
9270 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x13) {
9271 if (!epartIsReg(insn
[2])) {
9273 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
9275 storeLE( mkexpr(addr
),
9276 getXMMRegLane64( gregOfRM(insn
[2]),
9277 0/*lower lane*/ ) );
9278 DIP("movlps %s, %s\n", nameXMMReg( gregOfRM(insn
[2]) ),
9280 goto decode_success
;
9282 /* else fall through */
9285 /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E)
9286 to 4 lowest bits of ireg(G) */
9287 if (insn
[0] == 0x0F && insn
[1] == 0x50) {
9288 modrm
= getIByte(delta
+2);
9289 if (sz
== 4 && epartIsReg(modrm
)) {
9291 t0
= newTemp(Ity_I32
);
9292 t1
= newTemp(Ity_I32
);
9293 t2
= newTemp(Ity_I32
);
9294 t3
= newTemp(Ity_I32
);
9296 src
= eregOfRM(modrm
);
9297 assign( t0
, binop( Iop_And32
,
9298 binop(Iop_Shr32
, getXMMRegLane32(src
,0), mkU8(31)),
9300 assign( t1
, binop( Iop_And32
,
9301 binop(Iop_Shr32
, getXMMRegLane32(src
,1), mkU8(30)),
9303 assign( t2
, binop( Iop_And32
,
9304 binop(Iop_Shr32
, getXMMRegLane32(src
,2), mkU8(29)),
9306 assign( t3
, binop( Iop_And32
,
9307 binop(Iop_Shr32
, getXMMRegLane32(src
,3), mkU8(28)),
9309 putIReg(4, gregOfRM(modrm
),
9311 binop(Iop_Or32
, mkexpr(t0
), mkexpr(t1
)),
9312 binop(Iop_Or32
, mkexpr(t2
), mkexpr(t3
))
9315 DIP("movmskps %s,%s\n", nameXMMReg(src
),
9316 nameIReg(4, gregOfRM(modrm
)));
9317 goto decode_success
;
9319 /* else fall through */
9322 /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */
9323 /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */
9324 if (insn
[0] == 0x0F && insn
[1] == 0x2B) {
9325 modrm
= getIByte(delta
+2);
9326 if (!epartIsReg(modrm
)) {
9327 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9328 gen_SEGV_if_not_16_aligned( addr
);
9329 storeLE( mkexpr(addr
), getXMMReg(gregOfRM(modrm
)) );
9330 DIP("movntp%s %s,%s\n", sz
==2 ? "d" : "s",
9332 nameXMMReg(gregOfRM(modrm
)));
9334 goto decode_success
;
9336 /* else fall through */
9339 /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G
9340 (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */
9341 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x10) {
9343 modrm
= getIByte(delta
+3);
9344 if (epartIsReg(modrm
)) {
9345 putXMMRegLane32( gregOfRM(modrm
), 0,
9346 getXMMRegLane32( eregOfRM(modrm
), 0 ));
9347 DIP("movss %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
9348 nameXMMReg(gregOfRM(modrm
)));
9351 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
9352 /* zero bits 127:64 */
9353 putXMMRegLane64( gregOfRM(modrm
), 1, mkU64(0) );
9354 /* zero bits 63:32 */
9355 putXMMRegLane32( gregOfRM(modrm
), 1, mkU32(0) );
9356 /* write bits 31:0 */
9357 putXMMRegLane32( gregOfRM(modrm
), 0,
9358 loadLE(Ity_I32
, mkexpr(addr
)) );
9359 DIP("movss %s,%s\n", dis_buf
,
9360 nameXMMReg(gregOfRM(modrm
)));
9363 goto decode_success
;
9366 /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem
9368 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x11) {
9370 modrm
= getIByte(delta
+3);
9371 if (epartIsReg(modrm
)) {
9372 /* fall through, we don't yet have a test case */
9374 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
9375 storeLE( mkexpr(addr
),
9376 getXMMRegLane32(gregOfRM(modrm
), 0) );
9377 DIP("movss %s,%s\n", nameXMMReg(gregOfRM(modrm
)),
9380 goto decode_success
;
9384 /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */
9385 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x59) {
9386 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "mulps", Iop_Mul32Fx4
);
9387 goto decode_success
;
9390 /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */
9391 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x59) {
9393 delta
= dis_SSE_E_to_G_lo32( sorb
, delta
+3, "mulss", Iop_Mul32F0x4
);
9394 goto decode_success
;
9397 /* 0F 56 = ORPS -- G = G and E */
9398 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x56) {
9399 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "orps", Iop_OrV128
);
9400 goto decode_success
;
9403 /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */
9404 if (insn
[0] == 0x0F && insn
[1] == 0x53) {
9406 delta
= dis_SSE_E_to_G_unary_all( sorb
, delta
+2,
9407 "rcpps", Iop_RecipEst32Fx4
);
9408 goto decode_success
;
9411 /* F3 0F 53 = RCPSS -- approx reciprocal 32F0x4 from R/M to R */
9412 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x53) {
9414 delta
= dis_SSE_E_to_G_unary_lo32( sorb
, delta
+3,
9415 "rcpss", Iop_RecipEst32F0x4
);
9416 goto decode_success
;
9419 /* 0F 52 = RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to R */
9420 if (insn
[0] == 0x0F && insn
[1] == 0x52) {
9422 delta
= dis_SSE_E_to_G_unary_all( sorb
, delta
+2,
9423 "rsqrtps", Iop_RSqrtEst32Fx4
);
9424 goto decode_success
;
9427 /* F3 0F 52 = RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/M to R */
9428 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x52) {
9430 delta
= dis_SSE_E_to_G_unary_lo32( sorb
, delta
+3,
9431 "rsqrtss", Iop_RSqrtEst32F0x4
);
9432 goto decode_success
;
9435 /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */
9436 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xC6) {
9439 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
9440 sV
= newTemp(Ity_V128
);
9441 dV
= newTemp(Ity_V128
);
9442 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
9444 assign( dV
, getXMMReg(gregOfRM(modrm
)) );
9446 if (epartIsReg(modrm
)) {
9447 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
9448 select
= (Int
)insn
[3];
9450 DIP("shufps $%d,%s,%s\n", select
,
9451 nameXMMReg(eregOfRM(modrm
)),
9452 nameXMMReg(gregOfRM(modrm
)));
9454 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9455 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
9456 select
= (Int
)insn
[2+alen
];
9458 DIP("shufps $%d,%s,%s\n", select
,
9460 nameXMMReg(gregOfRM(modrm
)));
9463 breakup128to32s( dV
, &d3
, &d2
, &d1
, &d0
);
9464 breakup128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
9466 # define SELD(n) ((n)==0 ? d0 : ((n)==1 ? d1 : ((n)==2 ? d2 : d3)))
9467 # define SELS(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
9471 mk128from32s( SELS((select
>>6)&3), SELS((select
>>4)&3),
9472 SELD((select
>>2)&3), SELD((select
>>0)&3) )
9478 goto decode_success
;
9481 /* 0F 51 = SQRTPS -- approx sqrt 32Fx4 from R/M to R */
9482 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x51) {
9483 delta
= dis_SSE_E_to_G_unary_all( sorb
, delta
+2,
9484 "sqrtps", Iop_Sqrt32Fx4
);
9485 goto decode_success
;
9488 /* F3 0F 51 = SQRTSS -- approx sqrt 32F0x4 from R/M to R */
9489 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x51) {
9491 delta
= dis_SSE_E_to_G_unary_lo32( sorb
, delta
+3,
9492 "sqrtss", Iop_Sqrt32F0x4
);
9493 goto decode_success
;
9496 /* 0F AE /3 = STMXCSR m32 -- store %mxcsr */
9497 if (insn
[0] == 0x0F && insn
[1] == 0xAE
9498 && !epartIsReg(insn
[2]) && gregOfRM(insn
[2]) == 3) {
9499 modrm
= getIByte(delta
+2);
9501 vassert(!epartIsReg(modrm
));
9503 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9506 /* Fake up a native SSE mxcsr word. The only thing it depends
9507 on is SSEROUND[1:0], so call a clean helper to cook it up.
9509 /* UInt x86h_create_mxcsr ( UInt sseround ) */
9510 DIP("stmxcsr %s\n", dis_buf
);
9511 storeLE( mkexpr(addr
),
9514 "x86g_create_mxcsr", &x86g_create_mxcsr
,
9515 mkIRExprVec_1( get_sse_roundingmode() )
9518 goto decode_success
;
9521 /* 0F 5C = SUBPS -- sub 32Fx4 from R/M to R */
9522 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x5C) {
9523 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "subps", Iop_Sub32Fx4
);
9524 goto decode_success
;
9527 /* F3 0F 5C = SUBSS -- sub 32F0x4 from R/M to R */
9528 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x5C) {
9530 delta
= dis_SSE_E_to_G_lo32( sorb
, delta
+3, "subss", Iop_Sub32F0x4
);
9531 goto decode_success
;
9534 /* 0F 15 = UNPCKHPS -- unpack and interleave high part F32s */
9535 /* 0F 14 = UNPCKLPS -- unpack and interleave low part F32s */
9536 /* These just appear to be special cases of SHUFPS */
9537 if (sz
== 4 && insn
[0] == 0x0F && (insn
[1] == 0x15 || insn
[1] == 0x14)) {
9539 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
9540 Bool hi
= toBool(insn
[1] == 0x15);
9541 sV
= newTemp(Ity_V128
);
9542 dV
= newTemp(Ity_V128
);
9543 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
9545 assign( dV
, getXMMReg(gregOfRM(modrm
)) );
9547 if (epartIsReg(modrm
)) {
9548 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
9550 DIP("unpck%sps %s,%s\n", hi
? "h" : "l",
9551 nameXMMReg(eregOfRM(modrm
)),
9552 nameXMMReg(gregOfRM(modrm
)));
9554 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9555 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
9557 DIP("unpck%sps %s,%s\n", hi
? "h" : "l",
9559 nameXMMReg(gregOfRM(modrm
)));
9562 breakup128to32s( dV
, &d3
, &d2
, &d1
, &d0
);
9563 breakup128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
9566 putXMMReg( gregOfRM(modrm
), mk128from32s( s3
, d3
, s2
, d2
) );
9568 putXMMReg( gregOfRM(modrm
), mk128from32s( s1
, d1
, s0
, d0
) );
9571 goto decode_success
;
9574 /* 0F 57 = XORPS -- G = G and E */
9575 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x57) {
9576 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "xorps", Iop_XorV128
);
9577 goto decode_success
;
9580 /* ---------------------------------------------------- */
9581 /* --- end of the SSE decoder. --- */
9582 /* ---------------------------------------------------- */
9584 /* ---------------------------------------------------- */
9585 /* --- start of the SSE2 decoder. --- */
9586 /* ---------------------------------------------------- */
9588 /* Skip parts of the decoder which don't apply given the stated
9589 guest subarchitecture. */
9590 if (0 == (archinfo
->hwcaps
& VEX_HWCAPS_X86_SSE2
))
9591 goto after_sse_decoders
; /* no SSE2 capabilities */
9593 insn
= &guest_code
[delta
];
9595 /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */
9596 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x58) {
9597 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "addpd", Iop_Add64Fx2
);
9598 goto decode_success
;
9601 /* F2 0F 58 = ADDSD -- add 64F0x2 from R/M to R */
9602 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0x58) {
9604 delta
= dis_SSE_E_to_G_lo64( sorb
, delta
+3, "addsd", Iop_Add64F0x2
);
9605 goto decode_success
;
9608 /* 66 0F 55 = ANDNPD -- G = (not G) and E */
9609 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x55) {
9610 delta
= dis_SSE_E_to_G_all_invG( sorb
, delta
+2, "andnpd", Iop_AndV128
);
9611 goto decode_success
;
9614 /* 66 0F 54 = ANDPD -- G = G and E */
9615 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x54) {
9616 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "andpd", Iop_AndV128
);
9617 goto decode_success
;
9620 /* 66 0F C2 = CMPPD -- 64Fx2 comparison from R/M to R */
9621 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xC2) {
9622 delta
= dis_SSEcmp_E_to_G( sorb
, delta
+2, "cmppd", True
, 8 );
9623 goto decode_success
;
9626 /* F2 0F C2 = CMPSD -- 64F0x2 comparison from R/M to R */
9627 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0xC2) {
9629 delta
= dis_SSEcmp_E_to_G( sorb
, delta
+3, "cmpsd", False
, 8 );
9630 goto decode_success
;
9633 /* 66 0F 2F = COMISD -- 64F0x2 comparison G,E, and set ZCP */
9634 /* 66 0F 2E = UCOMISD -- 64F0x2 comparison G,E, and set ZCP */
9635 if (sz
== 2 && insn
[0] == 0x0F && (insn
[1] == 0x2F || insn
[1] == 0x2E)) {
9636 IRTemp argL
= newTemp(Ity_F64
);
9637 IRTemp argR
= newTemp(Ity_F64
);
9638 modrm
= getIByte(delta
+2);
9639 if (epartIsReg(modrm
)) {
9640 assign( argR
, getXMMRegLane64F( eregOfRM(modrm
), 0/*lowest lane*/ ) );
9642 DIP("[u]comisd %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
9643 nameXMMReg(gregOfRM(modrm
)) );
9645 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9646 assign( argR
, loadLE(Ity_F64
, mkexpr(addr
)) );
9648 DIP("[u]comisd %s,%s\n", dis_buf
,
9649 nameXMMReg(gregOfRM(modrm
)) );
9651 assign( argL
, getXMMRegLane64F( gregOfRM(modrm
), 0/*lowest lane*/ ) );
9653 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
9654 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
9658 binop(Iop_CmpF64
, mkexpr(argL
), mkexpr(argR
)),
9661 /* Set NDEP even though it isn't used. This makes redundant-PUT
9662 elimination of previous stores to this field work better. */
9663 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
9664 goto decode_success
;
9667 /* F3 0F E6 = CVTDQ2PD -- convert 2 x I32 in mem/lo half xmm to 2 x
9669 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0xE6) {
9670 IRTemp arg64
= newTemp(Ity_I64
);
9673 modrm
= getIByte(delta
+3);
9674 if (epartIsReg(modrm
)) {
9675 assign( arg64
, getXMMRegLane64(eregOfRM(modrm
), 0) );
9677 DIP("cvtdq2pd %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
9678 nameXMMReg(gregOfRM(modrm
)));
9680 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
9681 assign( arg64
, loadLE(Ity_I64
, mkexpr(addr
)) );
9683 DIP("cvtdq2pd %s,%s\n", dis_buf
,
9684 nameXMMReg(gregOfRM(modrm
)) );
9689 unop(Iop_I32StoF64
, unop(Iop_64to32
, mkexpr(arg64
)))
9694 unop(Iop_I32StoF64
, unop(Iop_64HIto32
, mkexpr(arg64
)))
9697 goto decode_success
;
9700 /* 0F 5B = CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 in
9702 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x5B) {
9703 IRTemp argV
= newTemp(Ity_V128
);
9704 IRTemp rmode
= newTemp(Ity_I32
);
9706 modrm
= getIByte(delta
+2);
9707 if (epartIsReg(modrm
)) {
9708 assign( argV
, getXMMReg(eregOfRM(modrm
)) );
9710 DIP("cvtdq2ps %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
9711 nameXMMReg(gregOfRM(modrm
)));
9713 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9714 assign( argV
, loadLE(Ity_V128
, mkexpr(addr
)) );
9716 DIP("cvtdq2ps %s,%s\n", dis_buf
,
9717 nameXMMReg(gregOfRM(modrm
)) );
9720 assign( rmode
, get_sse_roundingmode() );
9721 breakup128to32s( argV
, &t3
, &t2
, &t1
, &t0
);
9723 # define CVT(_t) binop( Iop_F64toF32, \
9725 unop(Iop_I32StoF64,mkexpr(_t)))
9727 putXMMRegLane32F( gregOfRM(modrm
), 3, CVT(t3
) );
9728 putXMMRegLane32F( gregOfRM(modrm
), 2, CVT(t2
) );
9729 putXMMRegLane32F( gregOfRM(modrm
), 1, CVT(t1
) );
9730 putXMMRegLane32F( gregOfRM(modrm
), 0, CVT(t0
) );
9734 goto decode_success
;
9737 /* F2 0F E6 = CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
9738 lo half xmm(G), and zero upper half */
9739 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0xE6) {
9740 IRTemp argV
= newTemp(Ity_V128
);
9741 IRTemp rmode
= newTemp(Ity_I32
);
9744 modrm
= getIByte(delta
+3);
9745 if (epartIsReg(modrm
)) {
9746 assign( argV
, getXMMReg(eregOfRM(modrm
)) );
9748 DIP("cvtpd2dq %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
9749 nameXMMReg(gregOfRM(modrm
)));
9751 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
9752 assign( argV
, loadLE(Ity_V128
, mkexpr(addr
)) );
9754 DIP("cvtpd2dq %s,%s\n", dis_buf
,
9755 nameXMMReg(gregOfRM(modrm
)) );
9758 assign( rmode
, get_sse_roundingmode() );
9759 t0
= newTemp(Ity_F64
);
9760 t1
= newTemp(Ity_F64
);
9761 assign( t0
, unop(Iop_ReinterpI64asF64
,
9762 unop(Iop_V128to64
, mkexpr(argV
))) );
9763 assign( t1
, unop(Iop_ReinterpI64asF64
,
9764 unop(Iop_V128HIto64
, mkexpr(argV
))) );
9766 # define CVT(_t) binop( Iop_F64toI32S, \
9770 putXMMRegLane32( gregOfRM(modrm
), 3, mkU32(0) );
9771 putXMMRegLane32( gregOfRM(modrm
), 2, mkU32(0) );
9772 putXMMRegLane32( gregOfRM(modrm
), 1, CVT(t1
) );
9773 putXMMRegLane32( gregOfRM(modrm
), 0, CVT(t0
) );
9777 goto decode_success
;
9780 /* 66 0F 2D = CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
9781 I32 in mmx, according to prevailing SSE rounding mode */
9782 /* 66 0F 2C = CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
9783 I32 in mmx, rounding towards zero */
9784 if (sz
== 2 && insn
[0] == 0x0F && (insn
[1] == 0x2D || insn
[1] == 0x2C)) {
9785 IRTemp dst64
= newTemp(Ity_I64
);
9786 IRTemp rmode
= newTemp(Ity_I32
);
9787 IRTemp f64lo
= newTemp(Ity_F64
);
9788 IRTemp f64hi
= newTemp(Ity_F64
);
9789 Bool r2zero
= toBool(insn
[1] == 0x2C);
9792 modrm
= getIByte(delta
+2);
9794 if (epartIsReg(modrm
)) {
9796 assign(f64lo
, getXMMRegLane64F(eregOfRM(modrm
), 0));
9797 assign(f64hi
, getXMMRegLane64F(eregOfRM(modrm
), 1));
9798 DIP("cvt%spd2pi %s,%s\n", r2zero
? "t" : "",
9799 nameXMMReg(eregOfRM(modrm
)),
9800 nameMMXReg(gregOfRM(modrm
)));
9802 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9803 assign(f64lo
, loadLE(Ity_F64
, mkexpr(addr
)));
9804 assign(f64hi
, loadLE(Ity_F64
, binop( Iop_Add32
,
9808 DIP("cvt%spf2pi %s,%s\n", r2zero
? "t" : "",
9810 nameMMXReg(gregOfRM(modrm
)));
9814 assign(rmode
, mkU32((UInt
)Irrm_ZERO
) );
9816 assign( rmode
, get_sse_roundingmode() );
9821 binop( Iop_32HLto64
,
9822 binop( Iop_F64toI32S
, mkexpr(rmode
), mkexpr(f64hi
) ),
9823 binop( Iop_F64toI32S
, mkexpr(rmode
), mkexpr(f64lo
) )
9827 putMMXReg(gregOfRM(modrm
), mkexpr(dst64
));
9828 goto decode_success
;
9831 /* 66 0F 5A = CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F32 in
9832 lo half xmm(G), and zero upper half */
9833 /* Note, this is practically identical to CVTPD2DQ. It would have
9834 been nicer to merge them together, but the insn[] offsets differ
9836 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x5A) {
9837 IRTemp argV
= newTemp(Ity_V128
);
9838 IRTemp rmode
= newTemp(Ity_I32
);
9840 modrm
= getIByte(delta
+2);
9841 if (epartIsReg(modrm
)) {
9842 assign( argV
, getXMMReg(eregOfRM(modrm
)) );
9844 DIP("cvtpd2ps %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
9845 nameXMMReg(gregOfRM(modrm
)));
9847 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9848 assign( argV
, loadLE(Ity_V128
, mkexpr(addr
)) );
9850 DIP("cvtpd2ps %s,%s\n", dis_buf
,
9851 nameXMMReg(gregOfRM(modrm
)) );
9854 assign( rmode
, get_sse_roundingmode() );
9855 t0
= newTemp(Ity_F64
);
9856 t1
= newTemp(Ity_F64
);
9857 assign( t0
, unop(Iop_ReinterpI64asF64
,
9858 unop(Iop_V128to64
, mkexpr(argV
))) );
9859 assign( t1
, unop(Iop_ReinterpI64asF64
,
9860 unop(Iop_V128HIto64
, mkexpr(argV
))) );
9862 # define CVT(_t) binop( Iop_F64toF32, \
9866 putXMMRegLane32( gregOfRM(modrm
), 3, mkU32(0) );
9867 putXMMRegLane32( gregOfRM(modrm
), 2, mkU32(0) );
9868 putXMMRegLane32F( gregOfRM(modrm
), 1, CVT(t1
) );
9869 putXMMRegLane32F( gregOfRM(modrm
), 0, CVT(t0
) );
9873 goto decode_success
;
9876 /* 66 0F 2A = CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F64 in
9878 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x2A) {
9879 IRTemp arg64
= newTemp(Ity_I64
);
9881 modrm
= getIByte(delta
+2);
9882 if (epartIsReg(modrm
)) {
9883 /* Only switch to MMX mode if the source is a MMX register.
9884 This is inconsistent with all other instructions which
9885 convert between XMM and (M64 or MMX), which always switch
9886 to MMX mode even if 64-bit operand is M64 and not MMX. At
9887 least, that's what the Intel docs seem to me to say.
9890 assign( arg64
, getMMXReg(eregOfRM(modrm
)) );
9892 DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregOfRM(modrm
)),
9893 nameXMMReg(gregOfRM(modrm
)));
9895 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9896 assign( arg64
, loadLE(Ity_I64
, mkexpr(addr
)) );
9898 DIP("cvtpi2pd %s,%s\n", dis_buf
,
9899 nameXMMReg(gregOfRM(modrm
)) );
9904 unop(Iop_I32StoF64
, unop(Iop_64to32
, mkexpr(arg64
)) )
9909 unop(Iop_I32StoF64
, unop(Iop_64HIto32
, mkexpr(arg64
)) )
9912 goto decode_success
;
9915 /* 66 0F 5B = CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
9917 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x5B) {
9918 IRTemp argV
= newTemp(Ity_V128
);
9919 IRTemp rmode
= newTemp(Ity_I32
);
9921 modrm
= getIByte(delta
+2);
9922 if (epartIsReg(modrm
)) {
9923 assign( argV
, getXMMReg(eregOfRM(modrm
)) );
9925 DIP("cvtps2dq %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
9926 nameXMMReg(gregOfRM(modrm
)));
9928 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9929 assign( argV
, loadLE(Ity_V128
, mkexpr(addr
)) );
9931 DIP("cvtps2dq %s,%s\n", dis_buf
,
9932 nameXMMReg(gregOfRM(modrm
)) );
9935 assign( rmode
, get_sse_roundingmode() );
9936 breakup128to32s( argV
, &t3
, &t2
, &t1
, &t0
);
9938 /* This is less than ideal. If it turns out to be a performance
9939 bottleneck it can be improved. */
9941 binop( Iop_F64toI32S, \
9943 unop( Iop_F32toF64, \
9944 unop( Iop_ReinterpI32asF32, mkexpr(_t))) )
9946 putXMMRegLane32( gregOfRM(modrm
), 3, CVT(t3
) );
9947 putXMMRegLane32( gregOfRM(modrm
), 2, CVT(t2
) );
9948 putXMMRegLane32( gregOfRM(modrm
), 1, CVT(t1
) );
9949 putXMMRegLane32( gregOfRM(modrm
), 0, CVT(t0
) );
9953 goto decode_success
;
9956 /* 0F 5A = CVTPS2PD -- convert 2 x F32 in low half mem/xmm to 2 x
9958 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0x5A) {
9959 IRTemp f32lo
= newTemp(Ity_F32
);
9960 IRTemp f32hi
= newTemp(Ity_F32
);
9962 modrm
= getIByte(delta
+2);
9963 if (epartIsReg(modrm
)) {
9964 assign( f32lo
, getXMMRegLane32F(eregOfRM(modrm
), 0) );
9965 assign( f32hi
, getXMMRegLane32F(eregOfRM(modrm
), 1) );
9967 DIP("cvtps2pd %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
9968 nameXMMReg(gregOfRM(modrm
)));
9970 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
9971 assign( f32lo
, loadLE(Ity_F32
, mkexpr(addr
)) );
9972 assign( f32hi
, loadLE(Ity_F32
,
9973 binop(Iop_Add32
,mkexpr(addr
),mkU32(4))) );
9975 DIP("cvtps2pd %s,%s\n", dis_buf
,
9976 nameXMMReg(gregOfRM(modrm
)) );
9979 putXMMRegLane64F( gregOfRM(modrm
), 1,
9980 unop(Iop_F32toF64
, mkexpr(f32hi
)) );
9981 putXMMRegLane64F( gregOfRM(modrm
), 0,
9982 unop(Iop_F32toF64
, mkexpr(f32lo
)) );
9984 goto decode_success
;
9987 /* F2 0F 2D = CVTSD2SI -- convert F64 in mem/low half xmm to
9988 I32 in ireg, according to prevailing SSE rounding mode */
9989 /* F2 0F 2C = CVTTSD2SI -- convert F64 in mem/low half xmm to
9990 I32 in ireg, rounding towards zero */
9991 if (insn
[0] == 0xF2 && insn
[1] == 0x0F
9992 && (insn
[2] == 0x2D || insn
[2] == 0x2C)) {
9993 IRTemp rmode
= newTemp(Ity_I32
);
9994 IRTemp f64lo
= newTemp(Ity_F64
);
9995 Bool r2zero
= toBool(insn
[2] == 0x2C);
9998 modrm
= getIByte(delta
+3);
9999 if (epartIsReg(modrm
)) {
10001 assign(f64lo
, getXMMRegLane64F(eregOfRM(modrm
), 0));
10002 DIP("cvt%ssd2si %s,%s\n", r2zero
? "t" : "",
10003 nameXMMReg(eregOfRM(modrm
)),
10004 nameIReg(4, gregOfRM(modrm
)));
10006 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
10007 assign(f64lo
, loadLE(Ity_F64
, mkexpr(addr
)));
10009 DIP("cvt%ssd2si %s,%s\n", r2zero
? "t" : "",
10011 nameIReg(4, gregOfRM(modrm
)));
10015 assign( rmode
, mkU32((UInt
)Irrm_ZERO
) );
10017 assign( rmode
, get_sse_roundingmode() );
10020 putIReg(4, gregOfRM(modrm
),
10021 binop( Iop_F64toI32S
, mkexpr(rmode
), mkexpr(f64lo
)) );
10023 goto decode_success
;
10026 /* F2 0F 5A = CVTSD2SS -- convert F64 in mem/low half xmm to F32 in
10027 low 1/4 xmm(G), according to prevailing SSE rounding mode */
10028 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0x5A) {
10029 IRTemp rmode
= newTemp(Ity_I32
);
10030 IRTemp f64lo
= newTemp(Ity_F64
);
10033 modrm
= getIByte(delta
+3);
10034 if (epartIsReg(modrm
)) {
10036 assign(f64lo
, getXMMRegLane64F(eregOfRM(modrm
), 0));
10037 DIP("cvtsd2ss %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
10038 nameXMMReg(gregOfRM(modrm
)));
10040 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
10041 assign(f64lo
, loadLE(Ity_F64
, mkexpr(addr
)));
10043 DIP("cvtsd2ss %s,%s\n", dis_buf
,
10044 nameXMMReg(gregOfRM(modrm
)));
10047 assign( rmode
, get_sse_roundingmode() );
10049 gregOfRM(modrm
), 0,
10050 binop( Iop_F64toF32
, mkexpr(rmode
), mkexpr(f64lo
) )
10053 goto decode_success
;
10056 /* F2 0F 2A = CVTSI2SD -- convert I32 in mem/ireg to F64 in low
10058 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0x2A) {
10059 IRTemp arg32
= newTemp(Ity_I32
);
10062 modrm
= getIByte(delta
+3);
10063 if (epartIsReg(modrm
)) {
10064 assign( arg32
, getIReg(4, eregOfRM(modrm
)) );
10066 DIP("cvtsi2sd %s,%s\n", nameIReg(4, eregOfRM(modrm
)),
10067 nameXMMReg(gregOfRM(modrm
)));
10069 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
10070 assign( arg32
, loadLE(Ity_I32
, mkexpr(addr
)) );
10072 DIP("cvtsi2sd %s,%s\n", dis_buf
,
10073 nameXMMReg(gregOfRM(modrm
)) );
10077 gregOfRM(modrm
), 0,
10078 unop(Iop_I32StoF64
, mkexpr(arg32
)) );
10080 goto decode_success
;
10083 /* F3 0F 5A = CVTSS2SD -- convert F32 in mem/low 1/4 xmm to F64 in
10085 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x5A) {
10086 IRTemp f32lo
= newTemp(Ity_F32
);
10089 modrm
= getIByte(delta
+3);
10090 if (epartIsReg(modrm
)) {
10092 assign(f32lo
, getXMMRegLane32F(eregOfRM(modrm
), 0));
10093 DIP("cvtss2sd %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
10094 nameXMMReg(gregOfRM(modrm
)));
10096 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
10097 assign(f32lo
, loadLE(Ity_F32
, mkexpr(addr
)));
10099 DIP("cvtss2sd %s,%s\n", dis_buf
,
10100 nameXMMReg(gregOfRM(modrm
)));
10103 putXMMRegLane64F( gregOfRM(modrm
), 0,
10104 unop( Iop_F32toF64
, mkexpr(f32lo
) ) );
10106 goto decode_success
;
10109 /* 66 0F E6 = CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
10110 lo half xmm(G), and zero upper half, rounding towards zero */
10111 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xE6) {
10112 IRTemp argV
= newTemp(Ity_V128
);
10113 IRTemp rmode
= newTemp(Ity_I32
);
10115 modrm
= getIByte(delta
+2);
10116 if (epartIsReg(modrm
)) {
10117 assign( argV
, getXMMReg(eregOfRM(modrm
)) );
10119 DIP("cvttpd2dq %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
10120 nameXMMReg(gregOfRM(modrm
)));
10122 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
10123 assign( argV
, loadLE(Ity_V128
, mkexpr(addr
)) );
10125 DIP("cvttpd2dq %s,%s\n", dis_buf
,
10126 nameXMMReg(gregOfRM(modrm
)) );
10129 assign( rmode
, mkU32((UInt
)Irrm_ZERO
) );
10131 t0
= newTemp(Ity_F64
);
10132 t1
= newTemp(Ity_F64
);
10133 assign( t0
, unop(Iop_ReinterpI64asF64
,
10134 unop(Iop_V128to64
, mkexpr(argV
))) );
10135 assign( t1
, unop(Iop_ReinterpI64asF64
,
10136 unop(Iop_V128HIto64
, mkexpr(argV
))) );
10138 # define CVT(_t) binop( Iop_F64toI32S, \
10142 putXMMRegLane32( gregOfRM(modrm
), 3, mkU32(0) );
10143 putXMMRegLane32( gregOfRM(modrm
), 2, mkU32(0) );
10144 putXMMRegLane32( gregOfRM(modrm
), 1, CVT(t1
) );
10145 putXMMRegLane32( gregOfRM(modrm
), 0, CVT(t0
) );
10149 goto decode_success
;
10152 /* F3 0F 5B = CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
10153 xmm(G), rounding towards zero */
10154 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x5B) {
10155 IRTemp argV
= newTemp(Ity_V128
);
10156 IRTemp rmode
= newTemp(Ity_I32
);
10159 modrm
= getIByte(delta
+3);
10160 if (epartIsReg(modrm
)) {
10161 assign( argV
, getXMMReg(eregOfRM(modrm
)) );
10163 DIP("cvttps2dq %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
10164 nameXMMReg(gregOfRM(modrm
)));
10166 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
10167 assign( argV
, loadLE(Ity_V128
, mkexpr(addr
)) );
10169 DIP("cvttps2dq %s,%s\n", dis_buf
,
10170 nameXMMReg(gregOfRM(modrm
)) );
10173 assign( rmode
, mkU32((UInt
)Irrm_ZERO
) );
10174 breakup128to32s( argV
, &t3
, &t2
, &t1
, &t0
);
10176 /* This is less than ideal. If it turns out to be a performance
10177 bottleneck it can be improved. */
10179 binop( Iop_F64toI32S, \
10181 unop( Iop_F32toF64, \
10182 unop( Iop_ReinterpI32asF32, mkexpr(_t))) )
10184 putXMMRegLane32( gregOfRM(modrm
), 3, CVT(t3
) );
10185 putXMMRegLane32( gregOfRM(modrm
), 2, CVT(t2
) );
10186 putXMMRegLane32( gregOfRM(modrm
), 1, CVT(t1
) );
10187 putXMMRegLane32( gregOfRM(modrm
), 0, CVT(t0
) );
10191 goto decode_success
;
10194 /* 66 0F 5E = DIVPD -- div 64Fx2 from R/M to R */
10195 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x5E) {
10196 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "divpd", Iop_Div64Fx2
);
10197 goto decode_success
;
10200 /* F2 0F 5E = DIVSD -- div 64F0x2 from R/M to R */
10201 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0x5E) {
10203 delta
= dis_SSE_E_to_G_lo64( sorb
, delta
+3, "divsd", Iop_Div64F0x2
);
10204 goto decode_success
;
10207 /* 0F AE /5 = LFENCE -- flush pending operations to memory */
10208 /* 0F AE /6 = MFENCE -- flush pending operations to memory */
10209 if (insn
[0] == 0x0F && insn
[1] == 0xAE
10210 && epartIsReg(insn
[2])
10211 && (gregOfRM(insn
[2]) == 5 || gregOfRM(insn
[2]) == 6)) {
10214 /* Insert a memory fence. It's sometimes important that these
10215 are carried through to the generated code. */
10216 stmt( IRStmt_MBE(Imbe_Fence
) );
10217 DIP("%sfence\n", gregOfRM(insn
[2])==5 ? "l" : "m");
10218 goto decode_success
;
10221 /* 66 0F 5F = MAXPD -- max 64Fx2 from R/M to R */
10222 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x5F) {
10223 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "maxpd", Iop_Max64Fx2
);
10224 goto decode_success
;
10227 /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */
10228 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0x5F) {
10230 delta
= dis_SSE_E_to_G_lo64( sorb
, delta
+3, "maxsd", Iop_Max64F0x2
);
10231 goto decode_success
;
10234 /* 66 0F 5D = MINPD -- min 64Fx2 from R/M to R */
10235 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x5D) {
10236 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "minpd", Iop_Min64Fx2
);
10237 goto decode_success
;
10240 /* F2 0F 5D = MINSD -- min 64F0x2 from R/M to R */
10241 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0x5D) {
10243 delta
= dis_SSE_E_to_G_lo64( sorb
, delta
+3, "minsd", Iop_Min64F0x2
);
10244 goto decode_success
;
10247 /* 66 0F 28 = MOVAPD -- move from E (mem or xmm) to G (xmm). */
10248 /* 66 0F 10 = MOVUPD -- move from E (mem or xmm) to G (xmm). */
10249 /* 66 0F 6F = MOVDQA -- move from E (mem or xmm) to G (xmm). */
10250 if (sz
== 2 && insn
[0] == 0x0F
10251 && (insn
[1] == 0x28 || insn
[1] == 0x10 || insn
[1] == 0x6F)) {
10252 const HChar
* wot
= insn
[1]==0x28 ? "apd" :
10253 insn
[1]==0x10 ? "upd" : "dqa";
10254 modrm
= getIByte(delta
+2);
10255 if (epartIsReg(modrm
)) {
10256 putXMMReg( gregOfRM(modrm
),
10257 getXMMReg( eregOfRM(modrm
) ));
10258 DIP("mov%s %s,%s\n", wot
, nameXMMReg(eregOfRM(modrm
)),
10259 nameXMMReg(gregOfRM(modrm
)));
10262 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
10263 if (insn
[1] == 0x28/*movapd*/ || insn
[1] == 0x6F/*movdqa*/)
10264 gen_SEGV_if_not_16_aligned( addr
);
10265 putXMMReg( gregOfRM(modrm
),
10266 loadLE(Ity_V128
, mkexpr(addr
)) );
10267 DIP("mov%s %s,%s\n", wot
, dis_buf
,
10268 nameXMMReg(gregOfRM(modrm
)));
10271 goto decode_success
;
10274 /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */
10275 /* 66 0F 11 = MOVUPD -- move from G (xmm) to E (mem or xmm). */
10276 if (sz
== 2 && insn
[0] == 0x0F
10277 && (insn
[1] == 0x29 || insn
[1] == 0x11)) {
10278 const HChar
* wot
= insn
[1]==0x29 ? "apd" : "upd";
10279 modrm
= getIByte(delta
+2);
10280 if (epartIsReg(modrm
)) {
10281 /* fall through; awaiting test case */
10283 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
10284 if (insn
[1] == 0x29/*movapd*/)
10285 gen_SEGV_if_not_16_aligned( addr
);
10286 storeLE( mkexpr(addr
), getXMMReg(gregOfRM(modrm
)) );
10287 DIP("mov%s %s,%s\n", wot
, nameXMMReg(gregOfRM(modrm
)),
10290 goto decode_success
;
10294 /* 66 0F 6E = MOVD from r/m32 to xmm, zeroing high 3/4 of xmm. */
10295 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x6E) {
10296 modrm
= getIByte(delta
+2);
10297 if (epartIsReg(modrm
)) {
10301 unop( Iop_32UtoV128
, getIReg(4, eregOfRM(modrm
)) )
10303 DIP("movd %s, %s\n",
10304 nameIReg(4,eregOfRM(modrm
)), nameXMMReg(gregOfRM(modrm
)));
10306 addr
= disAMode( &alen
, sorb
, delta
+2, dis_buf
);
10310 unop( Iop_32UtoV128
,loadLE(Ity_I32
, mkexpr(addr
)) )
10312 DIP("movd %s, %s\n", dis_buf
, nameXMMReg(gregOfRM(modrm
)));
10314 goto decode_success
;
10317 /* 66 0F 7E = MOVD from xmm low 1/4 to r/m32. */
10318 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x7E) {
10319 modrm
= getIByte(delta
+2);
10320 if (epartIsReg(modrm
)) {
10322 putIReg( 4, eregOfRM(modrm
),
10323 getXMMRegLane32(gregOfRM(modrm
), 0) );
10324 DIP("movd %s, %s\n",
10325 nameXMMReg(gregOfRM(modrm
)), nameIReg(4,eregOfRM(modrm
)));
10327 addr
= disAMode( &alen
, sorb
, delta
+2, dis_buf
);
10329 storeLE( mkexpr(addr
),
10330 getXMMRegLane32(gregOfRM(modrm
), 0) );
10331 DIP("movd %s, %s\n", nameXMMReg(gregOfRM(modrm
)), dis_buf
);
10333 goto decode_success
;
10336 /* 66 0F 7F = MOVDQA -- move from G (xmm) to E (mem or xmm). */
10337 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x7F) {
10338 modrm
= getIByte(delta
+2);
10339 if (epartIsReg(modrm
)) {
10341 putXMMReg( eregOfRM(modrm
),
10342 getXMMReg(gregOfRM(modrm
)) );
10343 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRM(modrm
)),
10344 nameXMMReg(eregOfRM(modrm
)));
10346 addr
= disAMode( &alen
, sorb
, delta
+2, dis_buf
);
10348 gen_SEGV_if_not_16_aligned( addr
);
10349 storeLE( mkexpr(addr
), getXMMReg(gregOfRM(modrm
)) );
10350 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRM(modrm
)), dis_buf
);
10352 goto decode_success
;
10355 /* F3 0F 6F = MOVDQU -- move from E (mem or xmm) to G (xmm). */
10356 /* Unfortunately can't simply use the MOVDQA case since the
10357 prefix lengths are different (66 vs F3) */
10358 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x6F) {
10360 modrm
= getIByte(delta
+3);
10361 if (epartIsReg(modrm
)) {
10362 putXMMReg( gregOfRM(modrm
),
10363 getXMMReg( eregOfRM(modrm
) ));
10364 DIP("movdqu %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
10365 nameXMMReg(gregOfRM(modrm
)));
10368 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
10369 putXMMReg( gregOfRM(modrm
),
10370 loadLE(Ity_V128
, mkexpr(addr
)) );
10371 DIP("movdqu %s,%s\n", dis_buf
,
10372 nameXMMReg(gregOfRM(modrm
)));
10375 goto decode_success
;
10378 /* F3 0F 7F = MOVDQU -- move from G (xmm) to E (mem or xmm). */
10379 /* Unfortunately can't simply use the MOVDQA case since the
10380 prefix lengths are different (66 vs F3) */
10381 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x7F) {
10383 modrm
= getIByte(delta
+3);
10384 if (epartIsReg(modrm
)) {
10386 putXMMReg( eregOfRM(modrm
),
10387 getXMMReg(gregOfRM(modrm
)) );
10388 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRM(modrm
)),
10389 nameXMMReg(eregOfRM(modrm
)));
10391 addr
= disAMode( &alen
, sorb
, delta
+3, dis_buf
);
10393 storeLE( mkexpr(addr
), getXMMReg(gregOfRM(modrm
)) );
10394 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRM(modrm
)), dis_buf
);
10396 goto decode_success
;
10399 /* F2 0F D6 = MOVDQ2Q -- move from E (lo half xmm, not mem) to G (mmx). */
10400 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0xD6) {
10402 modrm
= getIByte(delta
+3);
10403 if (epartIsReg(modrm
)) {
10405 putMMXReg( gregOfRM(modrm
),
10406 getXMMRegLane64( eregOfRM(modrm
), 0 ));
10407 DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
10408 nameMMXReg(gregOfRM(modrm
)));
10410 goto decode_success
;
10412 /* fall through, apparently no mem case for this insn */
10416 /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */
10417 /* These seems identical to MOVHPS. This instruction encoding is
10418 completely crazy. */
10419 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x16) {
10420 modrm
= getIByte(delta
+2);
10421 if (epartIsReg(modrm
)) {
10422 /* fall through; apparently reg-reg is not possible */
10424 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
10426 putXMMRegLane64( gregOfRM(modrm
), 1/*upper lane*/,
10427 loadLE(Ity_I64
, mkexpr(addr
)) );
10428 DIP("movhpd %s,%s\n", dis_buf
,
10429 nameXMMReg( gregOfRM(modrm
) ));
10430 goto decode_success
;
10434 /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */
10435 /* Again, this seems identical to MOVHPS. */
10436 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x17) {
10437 if (!epartIsReg(insn
[2])) {
10439 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
10441 storeLE( mkexpr(addr
),
10442 getXMMRegLane64( gregOfRM(insn
[2]),
10443 1/*upper lane*/ ) );
10444 DIP("movhpd %s,%s\n", nameXMMReg( gregOfRM(insn
[2]) ),
10446 goto decode_success
;
10448 /* else fall through */
10451 /* 66 0F 12 = MOVLPD -- move from mem to low half of XMM. */
10452 /* Identical to MOVLPS ? */
10453 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x12) {
10454 modrm
= getIByte(delta
+2);
10455 if (epartIsReg(modrm
)) {
10456 /* fall through; apparently reg-reg is not possible */
10458 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
10460 putXMMRegLane64( gregOfRM(modrm
), 0/*lower lane*/,
10461 loadLE(Ity_I64
, mkexpr(addr
)) );
10462 DIP("movlpd %s, %s\n",
10463 dis_buf
, nameXMMReg( gregOfRM(modrm
) ));
10464 goto decode_success
;
10468 /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */
10469 /* Identical to MOVLPS ? */
10470 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x13) {
10471 if (!epartIsReg(insn
[2])) {
10473 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
10475 storeLE( mkexpr(addr
),
10476 getXMMRegLane64( gregOfRM(insn
[2]),
10477 0/*lower lane*/ ) );
10478 DIP("movlpd %s, %s\n", nameXMMReg( gregOfRM(insn
[2]) ),
10480 goto decode_success
;
10482 /* else fall through */
10485 /* 66 0F 50 = MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to
10486 2 lowest bits of ireg(G) */
10487 if (insn
[0] == 0x0F && insn
[1] == 0x50) {
10488 modrm
= getIByte(delta
+2);
10489 if (sz
== 2 && epartIsReg(modrm
)) {
10491 t0
= newTemp(Ity_I32
);
10492 t1
= newTemp(Ity_I32
);
10494 src
= eregOfRM(modrm
);
10495 assign( t0
, binop( Iop_And32
,
10496 binop(Iop_Shr32
, getXMMRegLane32(src
,1), mkU8(31)),
10498 assign( t1
, binop( Iop_And32
,
10499 binop(Iop_Shr32
, getXMMRegLane32(src
,3), mkU8(30)),
10501 putIReg(4, gregOfRM(modrm
),
10502 binop(Iop_Or32
, mkexpr(t0
), mkexpr(t1
))
10504 DIP("movmskpd %s,%s\n", nameXMMReg(src
),
10505 nameIReg(4, gregOfRM(modrm
)));
10506 goto decode_success
;
10508 /* else fall through */
10511 /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */
10512 if (insn
[0] == 0x0F && insn
[1] == 0xF7) {
10513 modrm
= getIByte(delta
+2);
10514 if (sz
== 2 && epartIsReg(modrm
)) {
10515 IRTemp regD
= newTemp(Ity_V128
);
10516 IRTemp mask
= newTemp(Ity_V128
);
10517 IRTemp olddata
= newTemp(Ity_V128
);
10518 IRTemp newdata
= newTemp(Ity_V128
);
10519 addr
= newTemp(Ity_I32
);
10521 assign( addr
, handleSegOverride( sorb
, getIReg(4, R_EDI
) ));
10522 assign( regD
, getXMMReg( gregOfRM(modrm
) ));
10524 /* Unfortunately can't do the obvious thing with SarN8x16
10525 here since that can't be re-emitted as SSE2 code - no such
10529 binop(Iop_64HLtoV128
,
10531 getXMMRegLane64( eregOfRM(modrm
), 1 ),
10534 getXMMRegLane64( eregOfRM(modrm
), 0 ),
10536 assign( olddata
, loadLE( Ity_V128
, mkexpr(addr
) ));
10544 unop(Iop_NotV128
, mkexpr(mask
)))) );
10545 storeLE( mkexpr(addr
), mkexpr(newdata
) );
10548 DIP("maskmovdqu %s,%s\n", nameXMMReg( eregOfRM(modrm
) ),
10549 nameXMMReg( gregOfRM(modrm
) ) );
10550 goto decode_success
;
10552 /* else fall through */
10555 /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */
10556 if (insn
[0] == 0x0F && insn
[1] == 0xE7) {
10557 modrm
= getIByte(delta
+2);
10558 if (sz
== 2 && !epartIsReg(modrm
)) {
10559 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
10560 gen_SEGV_if_not_16_aligned( addr
);
10561 storeLE( mkexpr(addr
), getXMMReg(gregOfRM(modrm
)) );
10562 DIP("movntdq %s,%s\n", dis_buf
,
10563 nameXMMReg(gregOfRM(modrm
)));
10565 goto decode_success
;
10567 /* else fall through */
10570 /* 0F C3 = MOVNTI -- for us, just a plain ireg store. */
10571 if (insn
[0] == 0x0F && insn
[1] == 0xC3) {
10573 modrm
= getIByte(delta
+2);
10574 if (!epartIsReg(modrm
)) {
10575 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
10576 storeLE( mkexpr(addr
), getIReg(4, gregOfRM(modrm
)) );
10577 DIP("movnti %s,%s\n", dis_buf
,
10578 nameIReg(4, gregOfRM(modrm
)));
10580 goto decode_success
;
10582 /* else fall through */
10585 /* 66 0F D6 = MOVQ -- move 64 bits from G (lo half xmm) to E (mem
10586 or lo half xmm). */
10587 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xD6) {
10588 modrm
= getIByte(delta
+2);
10589 if (epartIsReg(modrm
)) {
10590 /* fall through, awaiting test case */
10591 /* dst: lo half copied, hi half zeroed */
10593 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
10594 storeLE( mkexpr(addr
),
10595 getXMMRegLane64( gregOfRM(modrm
), 0 ));
10596 DIP("movq %s,%s\n", nameXMMReg(gregOfRM(modrm
)), dis_buf
);
10598 goto decode_success
;
10602 /* F3 0F D6 = MOVQ2DQ -- move from E (mmx) to G (lo half xmm, zero
10604 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0xD6) {
10606 modrm
= getIByte(delta
+3);
10607 if (epartIsReg(modrm
)) {
10609 putXMMReg( gregOfRM(modrm
),
10610 unop(Iop_64UtoV128
, getMMXReg( eregOfRM(modrm
) )) );
10611 DIP("movq2dq %s,%s\n", nameMMXReg(eregOfRM(modrm
)),
10612 nameXMMReg(gregOfRM(modrm
)));
10614 goto decode_success
;
10616 /* fall through, apparently no mem case for this insn */
10620 /* F3 0F 7E = MOVQ -- move 64 bits from E (mem or lo half xmm) to
10621 G (lo half xmm). Upper half of G is zeroed out. */
10622 /* F2 0F 10 = MOVSD -- move 64 bits from E (mem or lo half xmm) to
10623 G (lo half xmm). If E is mem, upper half of G is zeroed out.
10624 If E is reg, upper half of G is unchanged. */
10625 if ((insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0x10)
10626 || (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x7E)) {
10628 modrm
= getIByte(delta
+3);
10629 if (epartIsReg(modrm
)) {
10630 putXMMRegLane64( gregOfRM(modrm
), 0,
10631 getXMMRegLane64( eregOfRM(modrm
), 0 ));
10632 if (insn
[0] == 0xF3/*MOVQ*/) {
10633 /* zero bits 127:64 */
10634 putXMMRegLane64( gregOfRM(modrm
), 1, mkU64(0) );
10636 DIP("movsd %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
10637 nameXMMReg(gregOfRM(modrm
)));
10640 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
10641 /* zero bits 127:64 */
10642 putXMMRegLane64( gregOfRM(modrm
), 1, mkU64(0) );
10643 /* write bits 63:0 */
10644 putXMMRegLane64( gregOfRM(modrm
), 0,
10645 loadLE(Ity_I64
, mkexpr(addr
)) );
10646 DIP("movsd %s,%s\n", dis_buf
,
10647 nameXMMReg(gregOfRM(modrm
)));
10650 goto decode_success
;
10653 /* F2 0F 11 = MOVSD -- move 64 bits from G (lo half xmm) to E (mem
10654 or lo half xmm). */
10655 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0x11) {
10657 modrm
= getIByte(delta
+3);
10658 if (epartIsReg(modrm
)) {
10659 putXMMRegLane64( eregOfRM(modrm
), 0,
10660 getXMMRegLane64( gregOfRM(modrm
), 0 ));
10661 DIP("movsd %s,%s\n", nameXMMReg(gregOfRM(modrm
)),
10662 nameXMMReg(eregOfRM(modrm
)));
10665 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
10666 storeLE( mkexpr(addr
),
10667 getXMMRegLane64(gregOfRM(modrm
), 0) );
10668 DIP("movsd %s,%s\n", nameXMMReg(gregOfRM(modrm
)),
10672 goto decode_success
;
10675 /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */
10676 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x59) {
10677 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "mulpd", Iop_Mul64Fx2
);
10678 goto decode_success
;
10681 /* F2 0F 59 = MULSD -- mul 64F0x2 from R/M to R */
10682 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0x59) {
10684 delta
= dis_SSE_E_to_G_lo64( sorb
, delta
+3, "mulsd", Iop_Mul64F0x2
);
10685 goto decode_success
;
10688 /* 66 0F 56 = ORPD -- G = G and E */
10689 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x56) {
10690 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "orpd", Iop_OrV128
);
10691 goto decode_success
;
10694 /* 66 0F C6 /r ib = SHUFPD -- shuffle packed F64s */
10695 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xC6) {
10697 IRTemp sV
= newTemp(Ity_V128
);
10698 IRTemp dV
= newTemp(Ity_V128
);
10699 IRTemp s1
= newTemp(Ity_I64
);
10700 IRTemp s0
= newTemp(Ity_I64
);
10701 IRTemp d1
= newTemp(Ity_I64
);
10702 IRTemp d0
= newTemp(Ity_I64
);
10705 assign( dV
, getXMMReg(gregOfRM(modrm
)) );
10707 if (epartIsReg(modrm
)) {
10708 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
10709 select
= (Int
)insn
[3];
10711 DIP("shufpd $%d,%s,%s\n", select
,
10712 nameXMMReg(eregOfRM(modrm
)),
10713 nameXMMReg(gregOfRM(modrm
)));
10715 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
10716 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
10717 select
= (Int
)insn
[2+alen
];
10719 DIP("shufpd $%d,%s,%s\n", select
,
10721 nameXMMReg(gregOfRM(modrm
)));
10724 assign( d1
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
10725 assign( d0
, unop(Iop_V128to64
, mkexpr(dV
)) );
10726 assign( s1
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
10727 assign( s0
, unop(Iop_V128to64
, mkexpr(sV
)) );
10729 # define SELD(n) mkexpr((n)==0 ? d0 : d1)
10730 # define SELS(n) mkexpr((n)==0 ? s0 : s1)
10734 binop(Iop_64HLtoV128
, SELS((select
>>1)&1), SELD((select
>>0)&1) )
10740 goto decode_success
;
10743 /* 66 0F 51 = SQRTPD -- approx sqrt 64Fx2 from R/M to R */
10744 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x51) {
10745 delta
= dis_SSE_E_to_G_unary_all( sorb
, delta
+2,
10746 "sqrtpd", Iop_Sqrt64Fx2
);
10747 goto decode_success
;
10750 /* F2 0F 51 = SQRTSD -- approx sqrt 64F0x2 from R/M to R */
10751 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0x51) {
10753 delta
= dis_SSE_E_to_G_unary_lo64( sorb
, delta
+3,
10754 "sqrtsd", Iop_Sqrt64F0x2
);
10755 goto decode_success
;
10758 /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */
10759 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x5C) {
10760 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "subpd", Iop_Sub64Fx2
);
10761 goto decode_success
;
10764 /* F2 0F 5C = SUBSD -- sub 64F0x2 from R/M to R */
10765 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0x5C) {
10767 delta
= dis_SSE_E_to_G_lo64( sorb
, delta
+3, "subsd", Iop_Sub64F0x2
);
10768 goto decode_success
;
10771 /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */
10772 /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */
10773 /* These just appear to be special cases of SHUFPS */
10774 if (sz
== 2 && insn
[0] == 0x0F && (insn
[1] == 0x15 || insn
[1] == 0x14)) {
10775 IRTemp s1
= newTemp(Ity_I64
);
10776 IRTemp s0
= newTemp(Ity_I64
);
10777 IRTemp d1
= newTemp(Ity_I64
);
10778 IRTemp d0
= newTemp(Ity_I64
);
10779 IRTemp sV
= newTemp(Ity_V128
);
10780 IRTemp dV
= newTemp(Ity_V128
);
10781 Bool hi
= toBool(insn
[1] == 0x15);
10784 assign( dV
, getXMMReg(gregOfRM(modrm
)) );
10786 if (epartIsReg(modrm
)) {
10787 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
10789 DIP("unpck%sps %s,%s\n", hi
? "h" : "l",
10790 nameXMMReg(eregOfRM(modrm
)),
10791 nameXMMReg(gregOfRM(modrm
)));
10793 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
10794 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
10796 DIP("unpck%sps %s,%s\n", hi
? "h" : "l",
10798 nameXMMReg(gregOfRM(modrm
)));
10801 assign( d1
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
10802 assign( d0
, unop(Iop_V128to64
, mkexpr(dV
)) );
10803 assign( s1
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
10804 assign( s0
, unop(Iop_V128to64
, mkexpr(sV
)) );
10807 putXMMReg( gregOfRM(modrm
),
10808 binop(Iop_64HLtoV128
, mkexpr(s1
), mkexpr(d1
)) );
10810 putXMMReg( gregOfRM(modrm
),
10811 binop(Iop_64HLtoV128
, mkexpr(s0
), mkexpr(d0
)) );
10814 goto decode_success
;
10817 /* 66 0F 57 = XORPD -- G = G and E */
10818 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x57) {
10819 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "xorpd", Iop_XorV128
);
10820 goto decode_success
;
10823 /* 66 0F 6B = PACKSSDW */
10824 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x6B) {
10825 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10827 Iop_QNarrowBin32Sto16Sx8
, True
);
10828 goto decode_success
;
10831 /* 66 0F 63 = PACKSSWB */
10832 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x63) {
10833 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10835 Iop_QNarrowBin16Sto8Sx16
, True
);
10836 goto decode_success
;
10839 /* 66 0F 67 = PACKUSWB */
10840 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x67) {
10841 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10843 Iop_QNarrowBin16Sto8Ux16
, True
);
10844 goto decode_success
;
10847 /* 66 0F FC = PADDB */
10848 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xFC) {
10849 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10850 "paddb", Iop_Add8x16
, False
);
10851 goto decode_success
;
10854 /* 66 0F FE = PADDD */
10855 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xFE) {
10856 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10857 "paddd", Iop_Add32x4
, False
);
10858 goto decode_success
;
10861 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
10862 /* 0F D4 = PADDQ -- add 64x1 */
10863 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xD4) {
10865 delta
= dis_MMXop_regmem_to_reg (
10866 sorb
, delta
+2, insn
[1], "paddq", False
);
10867 goto decode_success
;
10870 /* 66 0F D4 = PADDQ */
10871 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xD4) {
10872 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10873 "paddq", Iop_Add64x2
, False
);
10874 goto decode_success
;
10877 /* 66 0F FD = PADDW */
10878 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xFD) {
10879 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10880 "paddw", Iop_Add16x8
, False
);
10881 goto decode_success
;
10884 /* 66 0F EC = PADDSB */
10885 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xEC) {
10886 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10887 "paddsb", Iop_QAdd8Sx16
, False
);
10888 goto decode_success
;
10891 /* 66 0F ED = PADDSW */
10892 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xED) {
10893 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10894 "paddsw", Iop_QAdd16Sx8
, False
);
10895 goto decode_success
;
10898 /* 66 0F DC = PADDUSB */
10899 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xDC) {
10900 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10901 "paddusb", Iop_QAdd8Ux16
, False
);
10902 goto decode_success
;
10905 /* 66 0F DD = PADDUSW */
10906 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xDD) {
10907 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10908 "paddusw", Iop_QAdd16Ux8
, False
);
10909 goto decode_success
;
10912 /* 66 0F DB = PAND */
10913 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xDB) {
10914 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "pand", Iop_AndV128
);
10915 goto decode_success
;
10918 /* 66 0F DF = PANDN */
10919 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xDF) {
10920 delta
= dis_SSE_E_to_G_all_invG( sorb
, delta
+2, "pandn", Iop_AndV128
);
10921 goto decode_success
;
10924 /* 66 0F E0 = PAVGB */
10925 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xE0) {
10926 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10927 "pavgb", Iop_Avg8Ux16
, False
);
10928 goto decode_success
;
10931 /* 66 0F E3 = PAVGW */
10932 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xE3) {
10933 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10934 "pavgw", Iop_Avg16Ux8
, False
);
10935 goto decode_success
;
10938 /* 66 0F 74 = PCMPEQB */
10939 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x74) {
10940 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10941 "pcmpeqb", Iop_CmpEQ8x16
, False
);
10942 goto decode_success
;
10945 /* 66 0F 76 = PCMPEQD */
10946 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x76) {
10947 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10948 "pcmpeqd", Iop_CmpEQ32x4
, False
);
10949 goto decode_success
;
10952 /* 66 0F 75 = PCMPEQW */
10953 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x75) {
10954 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10955 "pcmpeqw", Iop_CmpEQ16x8
, False
);
10956 goto decode_success
;
10959 /* 66 0F 64 = PCMPGTB */
10960 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x64) {
10961 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10962 "pcmpgtb", Iop_CmpGT8Sx16
, False
);
10963 goto decode_success
;
10966 /* 66 0F 66 = PCMPGTD */
10967 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x66) {
10968 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10969 "pcmpgtd", Iop_CmpGT32Sx4
, False
);
10970 goto decode_success
;
10973 /* 66 0F 65 = PCMPGTW */
10974 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x65) {
10975 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
10976 "pcmpgtw", Iop_CmpGT16Sx8
, False
);
10977 goto decode_success
;
10980 /* 66 0F C5 = PEXTRW -- extract 16-bit field from xmm(E) and put
10981 zero-extend of it in ireg(G). */
10982 if (insn
[0] == 0x0F && insn
[1] == 0xC5) {
10984 if (sz
== 2 && epartIsReg(modrm
)) {
10985 t5
= newTemp(Ity_V128
);
10986 t4
= newTemp(Ity_I16
);
10987 assign(t5
, getXMMReg(eregOfRM(modrm
)));
10988 breakup128to32s( t5
, &t3
, &t2
, &t1
, &t0
);
10989 switch (insn
[3] & 7) {
10990 case 0: assign(t4
, unop(Iop_32to16
, mkexpr(t0
))); break;
10991 case 1: assign(t4
, unop(Iop_32HIto16
, mkexpr(t0
))); break;
10992 case 2: assign(t4
, unop(Iop_32to16
, mkexpr(t1
))); break;
10993 case 3: assign(t4
, unop(Iop_32HIto16
, mkexpr(t1
))); break;
10994 case 4: assign(t4
, unop(Iop_32to16
, mkexpr(t2
))); break;
10995 case 5: assign(t4
, unop(Iop_32HIto16
, mkexpr(t2
))); break;
10996 case 6: assign(t4
, unop(Iop_32to16
, mkexpr(t3
))); break;
10997 case 7: assign(t4
, unop(Iop_32HIto16
, mkexpr(t3
))); break;
10998 default: vassert(0); /*NOTREACHED*/
11000 putIReg(4, gregOfRM(modrm
), unop(Iop_16Uto32
, mkexpr(t4
)));
11001 DIP("pextrw $%d,%s,%s\n",
11002 (Int
)insn
[3], nameXMMReg(eregOfRM(modrm
)),
11003 nameIReg(4,gregOfRM(modrm
)));
11005 goto decode_success
;
11007 /* else fall through */
11010 /* 66 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
11011 put it into the specified lane of xmm(G). */
11012 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xC4) {
11014 t4
= newTemp(Ity_I16
);
11017 if (epartIsReg(modrm
)) {
11018 assign(t4
, getIReg(2, eregOfRM(modrm
)));
11020 lane
= insn
[3+1-1];
11021 DIP("pinsrw $%d,%s,%s\n", lane
,
11022 nameIReg(2,eregOfRM(modrm
)),
11023 nameXMMReg(gregOfRM(modrm
)));
11025 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
11027 lane
= insn
[3+alen
-1];
11028 assign(t4
, loadLE(Ity_I16
, mkexpr(addr
)));
11029 DIP("pinsrw $%d,%s,%s\n", lane
,
11031 nameXMMReg(gregOfRM(modrm
)));
11034 putXMMRegLane16( gregOfRM(modrm
), lane
& 7, mkexpr(t4
) );
11035 goto decode_success
;
11038 /* 66 0F F5 = PMADDWD -- Multiply and add packed integers from
11039 E(xmm or mem) to G(xmm) */
11040 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xF5) {
11041 IRTemp s1V
= newTemp(Ity_V128
);
11042 IRTemp s2V
= newTemp(Ity_V128
);
11043 IRTemp dV
= newTemp(Ity_V128
);
11044 IRTemp s1Hi
= newTemp(Ity_I64
);
11045 IRTemp s1Lo
= newTemp(Ity_I64
);
11046 IRTemp s2Hi
= newTemp(Ity_I64
);
11047 IRTemp s2Lo
= newTemp(Ity_I64
);
11048 IRTemp dHi
= newTemp(Ity_I64
);
11049 IRTemp dLo
= newTemp(Ity_I64
);
11051 if (epartIsReg(modrm
)) {
11052 assign( s1V
, getXMMReg(eregOfRM(modrm
)) );
11054 DIP("pmaddwd %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
11055 nameXMMReg(gregOfRM(modrm
)));
11057 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
11058 assign( s1V
, loadLE(Ity_V128
, mkexpr(addr
)) );
11060 DIP("pmaddwd %s,%s\n", dis_buf
,
11061 nameXMMReg(gregOfRM(modrm
)));
11063 assign( s2V
, getXMMReg(gregOfRM(modrm
)) );
11064 assign( s1Hi
, unop(Iop_V128HIto64
, mkexpr(s1V
)) );
11065 assign( s1Lo
, unop(Iop_V128to64
, mkexpr(s1V
)) );
11066 assign( s2Hi
, unop(Iop_V128HIto64
, mkexpr(s2V
)) );
11067 assign( s2Lo
, unop(Iop_V128to64
, mkexpr(s2V
)) );
11068 assign( dHi
, mkIRExprCCall(
11069 Ity_I64
, 0/*regparms*/,
11070 "x86g_calculate_mmx_pmaddwd",
11071 &x86g_calculate_mmx_pmaddwd
,
11072 mkIRExprVec_2( mkexpr(s1Hi
), mkexpr(s2Hi
))
11074 assign( dLo
, mkIRExprCCall(
11075 Ity_I64
, 0/*regparms*/,
11076 "x86g_calculate_mmx_pmaddwd",
11077 &x86g_calculate_mmx_pmaddwd
,
11078 mkIRExprVec_2( mkexpr(s1Lo
), mkexpr(s2Lo
))
11080 assign( dV
, binop(Iop_64HLtoV128
, mkexpr(dHi
), mkexpr(dLo
))) ;
11081 putXMMReg(gregOfRM(modrm
), mkexpr(dV
));
11082 goto decode_success
;
11085 /* 66 0F EE = PMAXSW -- 16x8 signed max */
11086 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xEE) {
11087 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11088 "pmaxsw", Iop_Max16Sx8
, False
);
11089 goto decode_success
;
11092 /* 66 0F DE = PMAXUB -- 8x16 unsigned max */
11093 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xDE) {
11094 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11095 "pmaxub", Iop_Max8Ux16
, False
);
11096 goto decode_success
;
11099 /* 66 0F EA = PMINSW -- 16x8 signed min */
11100 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xEA) {
11101 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11102 "pminsw", Iop_Min16Sx8
, False
);
11103 goto decode_success
;
11106 /* 66 0F DA = PMINUB -- 8x16 unsigned min */
11107 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xDA) {
11108 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11109 "pminub", Iop_Min8Ux16
, False
);
11110 goto decode_success
;
11113 /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16 lanes
11114 in xmm(E), turn them into a byte, and put zero-extend of it in
11116 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xD7) {
11118 if (epartIsReg(modrm
)) {
11119 t0
= newTemp(Ity_I64
);
11120 t1
= newTemp(Ity_I64
);
11121 assign(t0
, getXMMRegLane64(eregOfRM(modrm
), 0));
11122 assign(t1
, getXMMRegLane64(eregOfRM(modrm
), 1));
11123 t5
= newTemp(Ity_I32
);
11127 unop(Iop_GetMSBs8x8
, mkexpr(t1
)),
11128 unop(Iop_GetMSBs8x8
, mkexpr(t0
)))));
11129 putIReg(4, gregOfRM(modrm
), mkexpr(t5
));
11130 DIP("pmovmskb %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
11131 nameIReg(4,gregOfRM(modrm
)));
11133 goto decode_success
;
11135 /* else fall through */
11138 /* 66 0F E4 = PMULHUW -- 16x8 hi-half of unsigned widening multiply */
11139 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xE4) {
11140 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11141 "pmulhuw", Iop_MulHi16Ux8
, False
);
11142 goto decode_success
;
11145 /* 66 0F E5 = PMULHW -- 16x8 hi-half of signed widening multiply */
11146 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xE5) {
11147 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11148 "pmulhw", Iop_MulHi16Sx8
, False
);
11149 goto decode_success
;
11152 /* 66 0F D5 = PMULHL -- 16x8 multiply */
11153 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xD5) {
11154 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11155 "pmullw", Iop_Mul16x8
, False
);
11156 goto decode_success
;
11159 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
11160 /* 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
11161 0 to form 64-bit result */
11162 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xF4) {
11163 IRTemp sV
= newTemp(Ity_I64
);
11164 IRTemp dV
= newTemp(Ity_I64
);
11165 t1
= newTemp(Ity_I32
);
11166 t0
= newTemp(Ity_I32
);
11170 assign( dV
, getMMXReg(gregOfRM(modrm
)) );
11172 if (epartIsReg(modrm
)) {
11173 assign( sV
, getMMXReg(eregOfRM(modrm
)) );
11175 DIP("pmuludq %s,%s\n", nameMMXReg(eregOfRM(modrm
)),
11176 nameMMXReg(gregOfRM(modrm
)));
11178 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
11179 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
11181 DIP("pmuludq %s,%s\n", dis_buf
,
11182 nameMMXReg(gregOfRM(modrm
)));
11185 assign( t0
, unop(Iop_64to32
, mkexpr(dV
)) );
11186 assign( t1
, unop(Iop_64to32
, mkexpr(sV
)) );
11187 putMMXReg( gregOfRM(modrm
),
11188 binop( Iop_MullU32
, mkexpr(t0
), mkexpr(t1
) ) );
11189 goto decode_success
;
11192 /* 66 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
11193 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit
11195 /* This is a really poor translation -- could be improved if
11196 performance critical */
11197 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xF4) {
11199 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
11200 sV
= newTemp(Ity_V128
);
11201 dV
= newTemp(Ity_V128
);
11202 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
11203 t1
= newTemp(Ity_I64
);
11204 t0
= newTemp(Ity_I64
);
11206 assign( dV
, getXMMReg(gregOfRM(modrm
)) );
11208 if (epartIsReg(modrm
)) {
11209 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
11211 DIP("pmuludq %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
11212 nameXMMReg(gregOfRM(modrm
)));
11214 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
11215 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
11217 DIP("pmuludq %s,%s\n", dis_buf
,
11218 nameXMMReg(gregOfRM(modrm
)));
11221 breakup128to32s( dV
, &d3
, &d2
, &d1
, &d0
);
11222 breakup128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
11224 assign( t0
, binop( Iop_MullU32
, mkexpr(d0
), mkexpr(s0
)) );
11225 putXMMRegLane64( gregOfRM(modrm
), 0, mkexpr(t0
) );
11226 assign( t1
, binop( Iop_MullU32
, mkexpr(d2
), mkexpr(s2
)) );
11227 putXMMRegLane64( gregOfRM(modrm
), 1, mkexpr(t1
) );
11228 goto decode_success
;
11231 /* 66 0F EB = POR */
11232 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xEB) {
11233 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "por", Iop_OrV128
);
11234 goto decode_success
;
11237 /* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs
11238 from E(xmm or mem) to G(xmm) */
11239 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xF6) {
11240 IRTemp s1V
= newTemp(Ity_V128
);
11241 IRTemp s2V
= newTemp(Ity_V128
);
11242 IRTemp dV
= newTemp(Ity_V128
);
11243 IRTemp s1Hi
= newTemp(Ity_I64
);
11244 IRTemp s1Lo
= newTemp(Ity_I64
);
11245 IRTemp s2Hi
= newTemp(Ity_I64
);
11246 IRTemp s2Lo
= newTemp(Ity_I64
);
11247 IRTemp dHi
= newTemp(Ity_I64
);
11248 IRTemp dLo
= newTemp(Ity_I64
);
11250 if (epartIsReg(modrm
)) {
11251 assign( s1V
, getXMMReg(eregOfRM(modrm
)) );
11253 DIP("psadbw %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
11254 nameXMMReg(gregOfRM(modrm
)));
11256 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
11257 assign( s1V
, loadLE(Ity_V128
, mkexpr(addr
)) );
11259 DIP("psadbw %s,%s\n", dis_buf
,
11260 nameXMMReg(gregOfRM(modrm
)));
11262 assign( s2V
, getXMMReg(gregOfRM(modrm
)) );
11263 assign( s1Hi
, unop(Iop_V128HIto64
, mkexpr(s1V
)) );
11264 assign( s1Lo
, unop(Iop_V128to64
, mkexpr(s1V
)) );
11265 assign( s2Hi
, unop(Iop_V128HIto64
, mkexpr(s2V
)) );
11266 assign( s2Lo
, unop(Iop_V128to64
, mkexpr(s2V
)) );
11267 assign( dHi
, mkIRExprCCall(
11268 Ity_I64
, 0/*regparms*/,
11269 "x86g_calculate_mmx_psadbw",
11270 &x86g_calculate_mmx_psadbw
,
11271 mkIRExprVec_2( mkexpr(s1Hi
), mkexpr(s2Hi
))
11273 assign( dLo
, mkIRExprCCall(
11274 Ity_I64
, 0/*regparms*/,
11275 "x86g_calculate_mmx_psadbw",
11276 &x86g_calculate_mmx_psadbw
,
11277 mkIRExprVec_2( mkexpr(s1Lo
), mkexpr(s2Lo
))
11279 assign( dV
, binop(Iop_64HLtoV128
, mkexpr(dHi
), mkexpr(dLo
))) ;
11280 putXMMReg(gregOfRM(modrm
), mkexpr(dV
));
11281 goto decode_success
;
11284 /* 66 0F 70 = PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm) */
11285 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x70) {
11287 IRTemp sV
, dV
, s3
, s2
, s1
, s0
;
11288 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
11289 sV
= newTemp(Ity_V128
);
11290 dV
= newTemp(Ity_V128
);
11292 if (epartIsReg(modrm
)) {
11293 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
11294 order
= (Int
)insn
[3];
11296 DIP("pshufd $%d,%s,%s\n", order
,
11297 nameXMMReg(eregOfRM(modrm
)),
11298 nameXMMReg(gregOfRM(modrm
)));
11300 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
11301 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
11302 order
= (Int
)insn
[2+alen
];
11304 DIP("pshufd $%d,%s,%s\n", order
,
11306 nameXMMReg(gregOfRM(modrm
)));
11308 breakup128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
11311 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11313 mk128from32s( SEL((order
>>6)&3), SEL((order
>>4)&3),
11314 SEL((order
>>2)&3), SEL((order
>>0)&3) )
11316 putXMMReg(gregOfRM(modrm
), mkexpr(dV
));
11318 goto decode_success
;
11321 /* F3 0F 70 = PSHUFHW -- rearrange upper half 4x16 from E(xmm or
11322 mem) to G(xmm), and copy lower half */
11323 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0x70) {
11325 IRTemp sVhi
, dVhi
, sV
, dV
, s3
, s2
, s1
, s0
;
11326 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
11327 sV
= newTemp(Ity_V128
);
11328 dV
= newTemp(Ity_V128
);
11329 sVhi
= newTemp(Ity_I64
);
11330 dVhi
= newTemp(Ity_I64
);
11332 if (epartIsReg(modrm
)) {
11333 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
11334 order
= (Int
)insn
[4];
11336 DIP("pshufhw $%d,%s,%s\n", order
,
11337 nameXMMReg(eregOfRM(modrm
)),
11338 nameXMMReg(gregOfRM(modrm
)));
11340 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
11341 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
11342 order
= (Int
)insn
[3+alen
];
11344 DIP("pshufhw $%d,%s,%s\n", order
,
11346 nameXMMReg(gregOfRM(modrm
)));
11348 assign( sVhi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
11349 breakup64to16s( sVhi
, &s3
, &s2
, &s1
, &s0
);
11352 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11354 mk64from16s( SEL((order
>>6)&3), SEL((order
>>4)&3),
11355 SEL((order
>>2)&3), SEL((order
>>0)&3) )
11357 assign(dV
, binop( Iop_64HLtoV128
,
11359 unop(Iop_V128to64
, mkexpr(sV
))) );
11360 putXMMReg(gregOfRM(modrm
), mkexpr(dV
));
11362 goto decode_success
;
11365 /* F2 0F 70 = PSHUFLW -- rearrange lower half 4x16 from E(xmm or
11366 mem) to G(xmm), and copy upper half */
11367 if (insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0x70) {
11369 IRTemp sVlo
, dVlo
, sV
, dV
, s3
, s2
, s1
, s0
;
11370 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
11371 sV
= newTemp(Ity_V128
);
11372 dV
= newTemp(Ity_V128
);
11373 sVlo
= newTemp(Ity_I64
);
11374 dVlo
= newTemp(Ity_I64
);
11376 if (epartIsReg(modrm
)) {
11377 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
11378 order
= (Int
)insn
[4];
11380 DIP("pshuflw $%d,%s,%s\n", order
,
11381 nameXMMReg(eregOfRM(modrm
)),
11382 nameXMMReg(gregOfRM(modrm
)));
11384 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
11385 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
11386 order
= (Int
)insn
[3+alen
];
11388 DIP("pshuflw $%d,%s,%s\n", order
,
11390 nameXMMReg(gregOfRM(modrm
)));
11392 assign( sVlo
, unop(Iop_V128to64
, mkexpr(sV
)) );
11393 breakup64to16s( sVlo
, &s3
, &s2
, &s1
, &s0
);
11396 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11398 mk64from16s( SEL((order
>>6)&3), SEL((order
>>4)&3),
11399 SEL((order
>>2)&3), SEL((order
>>0)&3) )
11401 assign(dV
, binop( Iop_64HLtoV128
,
11402 unop(Iop_V128HIto64
, mkexpr(sV
)),
11404 putXMMReg(gregOfRM(modrm
), mkexpr(dV
));
11406 goto decode_success
;
11409 /* 66 0F 72 /6 ib = PSLLD by immediate */
11410 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x72
11411 && epartIsReg(insn
[2])
11412 && gregOfRM(insn
[2]) == 6) {
11413 delta
= dis_SSE_shiftE_imm( delta
+2, "pslld", Iop_ShlN32x4
);
11414 goto decode_success
;
11417 /* 66 0F F2 = PSLLD by E */
11418 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xF2) {
11419 delta
= dis_SSE_shiftG_byE( sorb
, delta
+2, "pslld", Iop_ShlN32x4
);
11420 goto decode_success
;
11423 /* 66 0F 73 /7 ib = PSLLDQ by immediate */
11424 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x73
11425 && epartIsReg(insn
[2])
11426 && gregOfRM(insn
[2]) == 7) {
11427 IRTemp sV
, dV
, hi64
, lo64
, hi64r
, lo64r
;
11428 Int imm
= (Int
)insn
[3];
11429 Int reg
= eregOfRM(insn
[2]);
11430 DIP("pslldq $%d,%s\n", imm
, nameXMMReg(reg
));
11431 vassert(imm
>= 0 && imm
<= 255);
11434 sV
= newTemp(Ity_V128
);
11435 dV
= newTemp(Ity_V128
);
11436 hi64
= newTemp(Ity_I64
);
11437 lo64
= newTemp(Ity_I64
);
11438 hi64r
= newTemp(Ity_I64
);
11439 lo64r
= newTemp(Ity_I64
);
11442 putXMMReg(reg
, mkV128(0x0000));
11443 goto decode_success
;
11446 assign( sV
, getXMMReg(reg
) );
11447 assign( hi64
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
11448 assign( lo64
, unop(Iop_V128to64
, mkexpr(sV
)) );
11451 assign( lo64r
, mkexpr(lo64
) );
11452 assign( hi64r
, mkexpr(hi64
) );
11456 assign( lo64r
, mkU64(0) );
11457 assign( hi64r
, mkexpr(lo64
) );
11461 assign( lo64r
, mkU64(0) );
11462 assign( hi64r
, binop( Iop_Shl64
,
11464 mkU8( 8*(imm
-8) ) ));
11466 assign( lo64r
, binop( Iop_Shl64
,
11471 binop(Iop_Shl64
, mkexpr(hi64
),
11473 binop(Iop_Shr64
, mkexpr(lo64
),
11474 mkU8(8 * (8 - imm
)) )
11478 assign( dV
, binop(Iop_64HLtoV128
, mkexpr(hi64r
), mkexpr(lo64r
)) );
11479 putXMMReg(reg
, mkexpr(dV
));
11480 goto decode_success
;
11483 /* 66 0F 73 /6 ib = PSLLQ by immediate */
11484 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x73
11485 && epartIsReg(insn
[2])
11486 && gregOfRM(insn
[2]) == 6) {
11487 delta
= dis_SSE_shiftE_imm( delta
+2, "psllq", Iop_ShlN64x2
);
11488 goto decode_success
;
11491 /* 66 0F F3 = PSLLQ by E */
11492 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xF3) {
11493 delta
= dis_SSE_shiftG_byE( sorb
, delta
+2, "psllq", Iop_ShlN64x2
);
11494 goto decode_success
;
11497 /* 66 0F 71 /6 ib = PSLLW by immediate */
11498 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x71
11499 && epartIsReg(insn
[2])
11500 && gregOfRM(insn
[2]) == 6) {
11501 delta
= dis_SSE_shiftE_imm( delta
+2, "psllw", Iop_ShlN16x8
);
11502 goto decode_success
;
11505 /* 66 0F F1 = PSLLW by E */
11506 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xF1) {
11507 delta
= dis_SSE_shiftG_byE( sorb
, delta
+2, "psllw", Iop_ShlN16x8
);
11508 goto decode_success
;
11511 /* 66 0F 72 /4 ib = PSRAD by immediate */
11512 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x72
11513 && epartIsReg(insn
[2])
11514 && gregOfRM(insn
[2]) == 4) {
11515 delta
= dis_SSE_shiftE_imm( delta
+2, "psrad", Iop_SarN32x4
);
11516 goto decode_success
;
11519 /* 66 0F E2 = PSRAD by E */
11520 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xE2) {
11521 delta
= dis_SSE_shiftG_byE( sorb
, delta
+2, "psrad", Iop_SarN32x4
);
11522 goto decode_success
;
11525 /* 66 0F 71 /4 ib = PSRAW by immediate */
11526 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x71
11527 && epartIsReg(insn
[2])
11528 && gregOfRM(insn
[2]) == 4) {
11529 delta
= dis_SSE_shiftE_imm( delta
+2, "psraw", Iop_SarN16x8
);
11530 goto decode_success
;
11533 /* 66 0F E1 = PSRAW by E */
11534 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xE1) {
11535 delta
= dis_SSE_shiftG_byE( sorb
, delta
+2, "psraw", Iop_SarN16x8
);
11536 goto decode_success
;
11539 /* 66 0F 72 /2 ib = PSRLD by immediate */
11540 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x72
11541 && epartIsReg(insn
[2])
11542 && gregOfRM(insn
[2]) == 2) {
11543 delta
= dis_SSE_shiftE_imm( delta
+2, "psrld", Iop_ShrN32x4
);
11544 goto decode_success
;
11547 /* 66 0F D2 = PSRLD by E */
11548 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xD2) {
11549 delta
= dis_SSE_shiftG_byE( sorb
, delta
+2, "psrld", Iop_ShrN32x4
);
11550 goto decode_success
;
11553 /* 66 0F 73 /3 ib = PSRLDQ by immediate */
11554 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x73
11555 && epartIsReg(insn
[2])
11556 && gregOfRM(insn
[2]) == 3) {
11557 IRTemp sV
, dV
, hi64
, lo64
, hi64r
, lo64r
;
11558 Int imm
= (Int
)insn
[3];
11559 Int reg
= eregOfRM(insn
[2]);
11560 DIP("psrldq $%d,%s\n", imm
, nameXMMReg(reg
));
11561 vassert(imm
>= 0 && imm
<= 255);
11564 sV
= newTemp(Ity_V128
);
11565 dV
= newTemp(Ity_V128
);
11566 hi64
= newTemp(Ity_I64
);
11567 lo64
= newTemp(Ity_I64
);
11568 hi64r
= newTemp(Ity_I64
);
11569 lo64r
= newTemp(Ity_I64
);
11572 putXMMReg(reg
, mkV128(0x0000));
11573 goto decode_success
;
11576 assign( sV
, getXMMReg(reg
) );
11577 assign( hi64
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
11578 assign( lo64
, unop(Iop_V128to64
, mkexpr(sV
)) );
11581 assign( lo64r
, mkexpr(lo64
) );
11582 assign( hi64r
, mkexpr(hi64
) );
11586 assign( hi64r
, mkU64(0) );
11587 assign( lo64r
, mkexpr(hi64
) );
11591 assign( hi64r
, mkU64(0) );
11592 assign( lo64r
, binop( Iop_Shr64
,
11594 mkU8( 8*(imm
-8) ) ));
11596 assign( hi64r
, binop( Iop_Shr64
,
11601 binop(Iop_Shr64
, mkexpr(lo64
),
11603 binop(Iop_Shl64
, mkexpr(hi64
),
11604 mkU8(8 * (8 - imm
)) )
11609 assign( dV
, binop(Iop_64HLtoV128
, mkexpr(hi64r
), mkexpr(lo64r
)) );
11610 putXMMReg(reg
, mkexpr(dV
));
11611 goto decode_success
;
11614 /* 66 0F 73 /2 ib = PSRLQ by immediate */
11615 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x73
11616 && epartIsReg(insn
[2])
11617 && gregOfRM(insn
[2]) == 2) {
11618 delta
= dis_SSE_shiftE_imm( delta
+2, "psrlq", Iop_ShrN64x2
);
11619 goto decode_success
;
11622 /* 66 0F D3 = PSRLQ by E */
11623 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xD3) {
11624 delta
= dis_SSE_shiftG_byE( sorb
, delta
+2, "psrlq", Iop_ShrN64x2
);
11625 goto decode_success
;
11628 /* 66 0F 71 /2 ib = PSRLW by immediate */
11629 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x71
11630 && epartIsReg(insn
[2])
11631 && gregOfRM(insn
[2]) == 2) {
11632 delta
= dis_SSE_shiftE_imm( delta
+2, "psrlw", Iop_ShrN16x8
);
11633 goto decode_success
;
11636 /* 66 0F D1 = PSRLW by E */
11637 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xD1) {
11638 delta
= dis_SSE_shiftG_byE( sorb
, delta
+2, "psrlw", Iop_ShrN16x8
);
11639 goto decode_success
;
11642 /* 66 0F F8 = PSUBB */
11643 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xF8) {
11644 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11645 "psubb", Iop_Sub8x16
, False
);
11646 goto decode_success
;
11649 /* 66 0F FA = PSUBD */
11650 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xFA) {
11651 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11652 "psubd", Iop_Sub32x4
, False
);
11653 goto decode_success
;
11656 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
11657 /* 0F FB = PSUBQ -- sub 64x1 */
11658 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xFB) {
11660 delta
= dis_MMXop_regmem_to_reg (
11661 sorb
, delta
+2, insn
[1], "psubq", False
);
11662 goto decode_success
;
11665 /* 66 0F FB = PSUBQ */
11666 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xFB) {
11667 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11668 "psubq", Iop_Sub64x2
, False
);
11669 goto decode_success
;
11672 /* 66 0F F9 = PSUBW */
11673 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xF9) {
11674 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11675 "psubw", Iop_Sub16x8
, False
);
11676 goto decode_success
;
11679 /* 66 0F E8 = PSUBSB */
11680 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xE8) {
11681 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11682 "psubsb", Iop_QSub8Sx16
, False
);
11683 goto decode_success
;
11686 /* 66 0F E9 = PSUBSW */
11687 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xE9) {
11688 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11689 "psubsw", Iop_QSub16Sx8
, False
);
11690 goto decode_success
;
11693 /* 66 0F D8 = PSUBSB */
11694 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xD8) {
11695 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11696 "psubusb", Iop_QSub8Ux16
, False
);
11697 goto decode_success
;
11700 /* 66 0F D9 = PSUBSW */
11701 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xD9) {
11702 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11703 "psubusw", Iop_QSub16Ux8
, False
);
11704 goto decode_success
;
11707 /* 66 0F 68 = PUNPCKHBW */
11708 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x68) {
11709 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11711 Iop_InterleaveHI8x16
, True
);
11712 goto decode_success
;
11715 /* 66 0F 6A = PUNPCKHDQ */
11716 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x6A) {
11717 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11719 Iop_InterleaveHI32x4
, True
);
11720 goto decode_success
;
11723 /* 66 0F 6D = PUNPCKHQDQ */
11724 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x6D) {
11725 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11727 Iop_InterleaveHI64x2
, True
);
11728 goto decode_success
;
11731 /* 66 0F 69 = PUNPCKHWD */
11732 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x69) {
11733 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11735 Iop_InterleaveHI16x8
, True
);
11736 goto decode_success
;
11739 /* 66 0F 60 = PUNPCKLBW */
11740 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x60) {
11741 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11743 Iop_InterleaveLO8x16
, True
);
11744 goto decode_success
;
11747 /* 66 0F 62 = PUNPCKLDQ */
11748 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x62) {
11749 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11751 Iop_InterleaveLO32x4
, True
);
11752 goto decode_success
;
11755 /* 66 0F 6C = PUNPCKLQDQ */
11756 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x6C) {
11757 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11759 Iop_InterleaveLO64x2
, True
);
11760 goto decode_success
;
11763 /* 66 0F 61 = PUNPCKLWD */
11764 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0x61) {
11765 delta
= dis_SSEint_E_to_G( sorb
, delta
+2,
11767 Iop_InterleaveLO16x8
, True
);
11768 goto decode_success
;
11771 /* 66 0F EF = PXOR */
11772 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xEF) {
11773 delta
= dis_SSE_E_to_G_all( sorb
, delta
+2, "pxor", Iop_XorV128
);
11774 goto decode_success
;
11777 //-- /* FXSAVE/FXRSTOR m32 -- load/store the FPU/MMX/SSE state. */
11778 //-- if (insn[0] == 0x0F && insn[1] == 0xAE
11779 //-- && (!epartIsReg(insn[2]))
11780 //-- && (gregOfRM(insn[2]) == 1 || gregOfRM(insn[2]) == 0) ) {
11781 //-- Bool store = gregOfRM(insn[2]) == 0;
11782 //-- vg_assert(sz == 4);
11783 //-- pair = disAMode ( cb, sorb, eip+2, dis_buf );
11784 //-- t1 = LOW24(pair);
11785 //-- eip += 2+HI8(pair);
11786 //-- uInstr3(cb, store ? SSE2a_MemWr : SSE2a_MemRd, 512,
11787 //-- Lit16, (((UShort)insn[0]) << 8) | (UShort)insn[1],
11788 //-- Lit16, (UShort)insn[2],
11789 //-- TempReg, t1 );
11790 //-- DIP("fx%s %s\n", store ? "save" : "rstor", dis_buf );
11791 //-- goto decode_success;
11794 /* 0F AE /7 = CLFLUSH -- flush cache line */
11795 if (sz
== 4 && insn
[0] == 0x0F && insn
[1] == 0xAE
11796 && !epartIsReg(insn
[2]) && gregOfRM(insn
[2]) == 7) {
11798 /* This is something of a hack. We need to know the size of the
11799 cache line containing addr. Since we don't (easily), assume
11800 256 on the basis that no real cache would have a line that
11801 big. It's safe to invalidate more stuff than we need, just
11803 UInt lineszB
= 256;
11805 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
11808 /* Round addr down to the start of the containing block. */
11813 mkU32( ~(lineszB
-1) ))) );
11815 stmt( IRStmt_Put(OFFB_CMLEN
, mkU32(lineszB
) ) );
11817 jmp_lit(&dres
, Ijk_InvalICache
, (Addr32
)(guest_EIP_bbstart
+delta
));
11819 DIP("clflush %s\n", dis_buf
);
11820 goto decode_success
;
11823 /* ---------------------------------------------------- */
11824 /* --- end of the SSE2 decoder. --- */
11825 /* ---------------------------------------------------- */
11827 /* ---------------------------------------------------- */
11828 /* --- start of the SSE3 decoder. --- */
11829 /* ---------------------------------------------------- */
11831 /* Skip parts of the decoder which don't apply given the stated
11832 guest subarchitecture. */
11833 if (0 == (archinfo
->hwcaps
& VEX_HWCAPS_X86_SSE3
))
11834 goto after_sse_decoders
; /* no SSE3 capabilities */
11836 insn
= &guest_code
[delta
];
11838 /* F3 0F 12 = MOVSLDUP -- move from E (mem or xmm) to G (xmm),
11839 duplicating some lanes (2:2:0:0). */
11840 /* F3 0F 16 = MOVSHDUP -- move from E (mem or xmm) to G (xmm),
11841 duplicating some lanes (3:3:1:1). */
11842 if (sz
== 4 && insn
[0] == 0xF3 && insn
[1] == 0x0F
11843 && (insn
[2] == 0x12 || insn
[2] == 0x16)) {
11844 IRTemp s3
, s2
, s1
, s0
;
11845 IRTemp sV
= newTemp(Ity_V128
);
11846 Bool isH
= insn
[2] == 0x16;
11847 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
11850 if (epartIsReg(modrm
)) {
11851 assign( sV
, getXMMReg( eregOfRM(modrm
)) );
11852 DIP("movs%cdup %s,%s\n", isH
? 'h' : 'l',
11853 nameXMMReg(eregOfRM(modrm
)),
11854 nameXMMReg(gregOfRM(modrm
)));
11857 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
11858 gen_SEGV_if_not_16_aligned( addr
);
11859 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
11860 DIP("movs%cdup %s,%s\n", isH
? 'h' : 'l',
11862 nameXMMReg(gregOfRM(modrm
)));
11866 breakup128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
11867 putXMMReg( gregOfRM(modrm
),
11868 isH
? mk128from32s( s3
, s3
, s1
, s1
)
11869 : mk128from32s( s2
, s2
, s0
, s0
) );
11870 goto decode_success
;
11873 /* F2 0F 12 = MOVDDUP -- move from E (mem or xmm) to G (xmm),
11874 duplicating some lanes (0:1:0:1). */
11875 if (sz
== 4 && insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0x12) {
11876 IRTemp sV
= newTemp(Ity_V128
);
11877 IRTemp d0
= newTemp(Ity_I64
);
11880 if (epartIsReg(modrm
)) {
11881 assign( sV
, getXMMReg( eregOfRM(modrm
)) );
11882 DIP("movddup %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
11883 nameXMMReg(gregOfRM(modrm
)));
11885 assign ( d0
, unop(Iop_V128to64
, mkexpr(sV
)) );
11887 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
11888 assign( d0
, loadLE(Ity_I64
, mkexpr(addr
)) );
11889 DIP("movddup %s,%s\n", dis_buf
,
11890 nameXMMReg(gregOfRM(modrm
)));
11894 putXMMReg( gregOfRM(modrm
), binop(Iop_64HLtoV128
,mkexpr(d0
),mkexpr(d0
)) );
11895 goto decode_success
;
11898 /* F2 0F D0 = ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xmm). */
11899 if (sz
== 4 && insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0xD0) {
11900 IRTemp a3
, a2
, a1
, a0
, s3
, s2
, s1
, s0
;
11901 IRTemp eV
= newTemp(Ity_V128
);
11902 IRTemp gV
= newTemp(Ity_V128
);
11903 IRTemp addV
= newTemp(Ity_V128
);
11904 IRTemp subV
= newTemp(Ity_V128
);
11905 IRTemp rm
= newTemp(Ity_I32
);
11906 a3
= a2
= a1
= a0
= s3
= s2
= s1
= s0
= IRTemp_INVALID
;
11909 if (epartIsReg(modrm
)) {
11910 assign( eV
, getXMMReg( eregOfRM(modrm
)) );
11911 DIP("addsubps %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
11912 nameXMMReg(gregOfRM(modrm
)));
11915 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
11916 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
11917 DIP("addsubps %s,%s\n", dis_buf
,
11918 nameXMMReg(gregOfRM(modrm
)));
11922 assign( gV
, getXMMReg(gregOfRM(modrm
)) );
11924 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11925 assign( addV
, triop(Iop_Add32Fx4
, mkexpr(rm
), mkexpr(gV
), mkexpr(eV
)) );
11926 assign( subV
, triop(Iop_Sub32Fx4
, mkexpr(rm
), mkexpr(gV
), mkexpr(eV
)) );
11928 breakup128to32s( addV
, &a3
, &a2
, &a1
, &a0
);
11929 breakup128to32s( subV
, &s3
, &s2
, &s1
, &s0
);
11931 putXMMReg( gregOfRM(modrm
), mk128from32s( a3
, s2
, a1
, s0
));
11932 goto decode_success
;
11935 /* 66 0F D0 = ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). */
11936 if (sz
== 2 && insn
[0] == 0x0F && insn
[1] == 0xD0) {
11937 IRTemp eV
= newTemp(Ity_V128
);
11938 IRTemp gV
= newTemp(Ity_V128
);
11939 IRTemp addV
= newTemp(Ity_V128
);
11940 IRTemp subV
= newTemp(Ity_V128
);
11941 IRTemp a1
= newTemp(Ity_I64
);
11942 IRTemp s0
= newTemp(Ity_I64
);
11943 IRTemp rm
= newTemp(Ity_I32
);
11946 if (epartIsReg(modrm
)) {
11947 assign( eV
, getXMMReg( eregOfRM(modrm
)) );
11948 DIP("addsubpd %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
11949 nameXMMReg(gregOfRM(modrm
)));
11952 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
11953 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
11954 DIP("addsubpd %s,%s\n", dis_buf
,
11955 nameXMMReg(gregOfRM(modrm
)));
11959 assign( gV
, getXMMReg(gregOfRM(modrm
)) );
11961 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11962 assign( addV
, triop(Iop_Add64Fx2
, mkexpr(rm
), mkexpr(gV
), mkexpr(eV
)) );
11963 assign( subV
, triop(Iop_Sub64Fx2
, mkexpr(rm
), mkexpr(gV
), mkexpr(eV
)) );
11965 assign( a1
, unop(Iop_V128HIto64
, mkexpr(addV
) ));
11966 assign( s0
, unop(Iop_V128to64
, mkexpr(subV
) ));
11968 putXMMReg( gregOfRM(modrm
),
11969 binop(Iop_64HLtoV128
, mkexpr(a1
), mkexpr(s0
)) );
11970 goto decode_success
;
11973 /* F2 0F 7D = HSUBPS -- 32x4 sub across from E (mem or xmm) to G (xmm). */
11974 /* F2 0F 7C = HADDPS -- 32x4 add across from E (mem or xmm) to G (xmm). */
11975 if (sz
== 4 && insn
[0] == 0xF2 && insn
[1] == 0x0F
11976 && (insn
[2] == 0x7C || insn
[2] == 0x7D)) {
11977 IRTemp e3
, e2
, e1
, e0
, g3
, g2
, g1
, g0
;
11978 IRTemp eV
= newTemp(Ity_V128
);
11979 IRTemp gV
= newTemp(Ity_V128
);
11980 IRTemp leftV
= newTemp(Ity_V128
);
11981 IRTemp rightV
= newTemp(Ity_V128
);
11982 IRTemp rm
= newTemp(Ity_I32
);
11983 Bool isAdd
= insn
[2] == 0x7C;
11984 const HChar
* str
= isAdd
? "add" : "sub";
11985 e3
= e2
= e1
= e0
= g3
= g2
= g1
= g0
= IRTemp_INVALID
;
11988 if (epartIsReg(modrm
)) {
11989 assign( eV
, getXMMReg( eregOfRM(modrm
)) );
11990 DIP("h%sps %s,%s\n", str
, nameXMMReg(eregOfRM(modrm
)),
11991 nameXMMReg(gregOfRM(modrm
)));
11994 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
11995 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
11996 DIP("h%sps %s,%s\n", str
, dis_buf
,
11997 nameXMMReg(gregOfRM(modrm
)));
12001 assign( gV
, getXMMReg(gregOfRM(modrm
)) );
12003 breakup128to32s( eV
, &e3
, &e2
, &e1
, &e0
);
12004 breakup128to32s( gV
, &g3
, &g2
, &g1
, &g0
);
12006 assign( leftV
, mk128from32s( e2
, e0
, g2
, g0
) );
12007 assign( rightV
, mk128from32s( e3
, e1
, g3
, g1
) );
12009 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
12010 putXMMReg( gregOfRM(modrm
),
12011 triop(isAdd
? Iop_Add32Fx4
: Iop_Sub32Fx4
,
12012 mkexpr(rm
), mkexpr(leftV
), mkexpr(rightV
) ) );
12013 goto decode_success
;
12016 /* 66 0F 7D = HSUBPD -- 64x2 sub across from E (mem or xmm) to G (xmm). */
12017 /* 66 0F 7C = HADDPD -- 64x2 add across from E (mem or xmm) to G (xmm). */
12018 if (sz
== 2 && insn
[0] == 0x0F && (insn
[1] == 0x7C || insn
[1] == 0x7D)) {
12019 IRTemp e1
= newTemp(Ity_I64
);
12020 IRTemp e0
= newTemp(Ity_I64
);
12021 IRTemp g1
= newTemp(Ity_I64
);
12022 IRTemp g0
= newTemp(Ity_I64
);
12023 IRTemp eV
= newTemp(Ity_V128
);
12024 IRTemp gV
= newTemp(Ity_V128
);
12025 IRTemp leftV
= newTemp(Ity_V128
);
12026 IRTemp rightV
= newTemp(Ity_V128
);
12027 IRTemp rm
= newTemp(Ity_I32
);
12028 Bool isAdd
= insn
[1] == 0x7C;
12029 const HChar
* str
= isAdd
? "add" : "sub";
12032 if (epartIsReg(modrm
)) {
12033 assign( eV
, getXMMReg( eregOfRM(modrm
)) );
12034 DIP("h%spd %s,%s\n", str
, nameXMMReg(eregOfRM(modrm
)),
12035 nameXMMReg(gregOfRM(modrm
)));
12038 addr
= disAMode ( &alen
, sorb
, delta
+2, dis_buf
);
12039 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
12040 DIP("h%spd %s,%s\n", str
, dis_buf
,
12041 nameXMMReg(gregOfRM(modrm
)));
12045 assign( gV
, getXMMReg(gregOfRM(modrm
)) );
12047 assign( e1
, unop(Iop_V128HIto64
, mkexpr(eV
) ));
12048 assign( e0
, unop(Iop_V128to64
, mkexpr(eV
) ));
12049 assign( g1
, unop(Iop_V128HIto64
, mkexpr(gV
) ));
12050 assign( g0
, unop(Iop_V128to64
, mkexpr(gV
) ));
12052 assign( leftV
, binop(Iop_64HLtoV128
, mkexpr(e0
),mkexpr(g0
)) );
12053 assign( rightV
, binop(Iop_64HLtoV128
, mkexpr(e1
),mkexpr(g1
)) );
12055 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
12056 putXMMReg( gregOfRM(modrm
),
12057 triop(isAdd
? Iop_Add64Fx2
: Iop_Sub64Fx2
,
12058 mkexpr(rm
), mkexpr(leftV
), mkexpr(rightV
) ) );
12059 goto decode_success
;
12062 /* F2 0F F0 = LDDQU -- move from E (mem or xmm) to G (xmm). */
12063 if (sz
== 4 && insn
[0] == 0xF2 && insn
[1] == 0x0F && insn
[2] == 0xF0) {
12064 modrm
= getIByte(delta
+3);
12065 if (epartIsReg(modrm
)) {
12066 goto decode_failure
;
12068 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12069 putXMMReg( gregOfRM(modrm
),
12070 loadLE(Ity_V128
, mkexpr(addr
)) );
12071 DIP("lddqu %s,%s\n", dis_buf
,
12072 nameXMMReg(gregOfRM(modrm
)));
12075 goto decode_success
;
12078 /* ---------------------------------------------------- */
12079 /* --- end of the SSE3 decoder. --- */
12080 /* ---------------------------------------------------- */
12082 /* ---------------------------------------------------- */
12083 /* --- start of the SSSE3 decoder. --- */
12084 /* ---------------------------------------------------- */
12086 /* 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
12087 Unsigned Bytes (MMX) */
12089 && insn
[0] == 0x0F && insn
[1] == 0x38 && insn
[2] == 0x04) {
12090 IRTemp sV
= newTemp(Ity_I64
);
12091 IRTemp dV
= newTemp(Ity_I64
);
12092 IRTemp sVoddsSX
= newTemp(Ity_I64
);
12093 IRTemp sVevensSX
= newTemp(Ity_I64
);
12094 IRTemp dVoddsZX
= newTemp(Ity_I64
);
12095 IRTemp dVevensZX
= newTemp(Ity_I64
);
12099 assign( dV
, getMMXReg(gregOfRM(modrm
)) );
12101 if (epartIsReg(modrm
)) {
12102 assign( sV
, getMMXReg(eregOfRM(modrm
)) );
12104 DIP("pmaddubsw %s,%s\n", nameMMXReg(eregOfRM(modrm
)),
12105 nameMMXReg(gregOfRM(modrm
)));
12107 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12108 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
12110 DIP("pmaddubsw %s,%s\n", dis_buf
,
12111 nameMMXReg(gregOfRM(modrm
)));
12114 /* compute dV unsigned x sV signed */
12116 binop(Iop_SarN16x4
, mkexpr(sV
), mkU8(8)) );
12118 binop(Iop_SarN16x4
,
12119 binop(Iop_ShlN16x4
, mkexpr(sV
), mkU8(8)),
12122 binop(Iop_ShrN16x4
, mkexpr(dV
), mkU8(8)) );
12124 binop(Iop_ShrN16x4
,
12125 binop(Iop_ShlN16x4
, mkexpr(dV
), mkU8(8)),
12130 binop(Iop_QAdd16Sx4
,
12131 binop(Iop_Mul16x4
, mkexpr(sVoddsSX
), mkexpr(dVoddsZX
)),
12132 binop(Iop_Mul16x4
, mkexpr(sVevensSX
), mkexpr(dVevensZX
))
12135 goto decode_success
;
12138 /* 66 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
12139 Unsigned Bytes (XMM) */
12141 && insn
[0] == 0x0F && insn
[1] == 0x38 && insn
[2] == 0x04) {
12142 IRTemp sV
= newTemp(Ity_V128
);
12143 IRTemp dV
= newTemp(Ity_V128
);
12144 IRTemp sVoddsSX
= newTemp(Ity_V128
);
12145 IRTemp sVevensSX
= newTemp(Ity_V128
);
12146 IRTemp dVoddsZX
= newTemp(Ity_V128
);
12147 IRTemp dVevensZX
= newTemp(Ity_V128
);
12150 assign( dV
, getXMMReg(gregOfRM(modrm
)) );
12152 if (epartIsReg(modrm
)) {
12153 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
12155 DIP("pmaddubsw %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
12156 nameXMMReg(gregOfRM(modrm
)));
12158 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12159 gen_SEGV_if_not_16_aligned( addr
);
12160 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
12162 DIP("pmaddubsw %s,%s\n", dis_buf
,
12163 nameXMMReg(gregOfRM(modrm
)));
12166 /* compute dV unsigned x sV signed */
12168 binop(Iop_SarN16x8
, mkexpr(sV
), mkU8(8)) );
12170 binop(Iop_SarN16x8
,
12171 binop(Iop_ShlN16x8
, mkexpr(sV
), mkU8(8)),
12174 binop(Iop_ShrN16x8
, mkexpr(dV
), mkU8(8)) );
12176 binop(Iop_ShrN16x8
,
12177 binop(Iop_ShlN16x8
, mkexpr(dV
), mkU8(8)),
12182 binop(Iop_QAdd16Sx8
,
12183 binop(Iop_Mul16x8
, mkexpr(sVoddsSX
), mkexpr(dVoddsZX
)),
12184 binop(Iop_Mul16x8
, mkexpr(sVevensSX
), mkexpr(dVevensZX
))
12187 goto decode_success
;
12190 /* ***--- these are MMX class insns introduced in SSSE3 ---*** */
12191 /* 0F 38 03 = PHADDSW -- 16x4 signed qadd across from E (mem or
12192 mmx) and G to G (mmx). */
12193 /* 0F 38 07 = PHSUBSW -- 16x4 signed qsub across from E (mem or
12194 mmx) and G to G (mmx). */
12195 /* 0F 38 01 = PHADDW -- 16x4 add across from E (mem or mmx) and G
12197 /* 0F 38 05 = PHSUBW -- 16x4 sub across from E (mem or mmx) and G
12199 /* 0F 38 02 = PHADDD -- 32x2 add across from E (mem or mmx) and G
12201 /* 0F 38 06 = PHSUBD -- 32x2 sub across from E (mem or mmx) and G
12205 && insn
[0] == 0x0F && insn
[1] == 0x38
12206 && (insn
[2] == 0x03 || insn
[2] == 0x07 || insn
[2] == 0x01
12207 || insn
[2] == 0x05 || insn
[2] == 0x02 || insn
[2] == 0x06)) {
12208 const HChar
* str
= "???";
12209 IROp opV64
= Iop_INVALID
;
12210 IROp opCatO
= Iop_CatOddLanes16x4
;
12211 IROp opCatE
= Iop_CatEvenLanes16x4
;
12212 IRTemp sV
= newTemp(Ity_I64
);
12213 IRTemp dV
= newTemp(Ity_I64
);
12218 case 0x03: opV64
= Iop_QAdd16Sx4
; str
= "addsw"; break;
12219 case 0x07: opV64
= Iop_QSub16Sx4
; str
= "subsw"; break;
12220 case 0x01: opV64
= Iop_Add16x4
; str
= "addw"; break;
12221 case 0x05: opV64
= Iop_Sub16x4
; str
= "subw"; break;
12222 case 0x02: opV64
= Iop_Add32x2
; str
= "addd"; break;
12223 case 0x06: opV64
= Iop_Sub32x2
; str
= "subd"; break;
12224 default: vassert(0);
12226 if (insn
[2] == 0x02 || insn
[2] == 0x06) {
12227 opCatO
= Iop_InterleaveHI32x2
;
12228 opCatE
= Iop_InterleaveLO32x2
;
12232 assign( dV
, getMMXReg(gregOfRM(modrm
)) );
12234 if (epartIsReg(modrm
)) {
12235 assign( sV
, getMMXReg(eregOfRM(modrm
)) );
12237 DIP("ph%s %s,%s\n", str
, nameMMXReg(eregOfRM(modrm
)),
12238 nameMMXReg(gregOfRM(modrm
)));
12240 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12241 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
12243 DIP("ph%s %s,%s\n", str
, dis_buf
,
12244 nameMMXReg(gregOfRM(modrm
)));
12250 binop(opCatE
,mkexpr(sV
),mkexpr(dV
)),
12251 binop(opCatO
,mkexpr(sV
),mkexpr(dV
))
12254 goto decode_success
;
12257 /* 66 0F 38 03 = PHADDSW -- 16x8 signed qadd across from E (mem or
12258 xmm) and G to G (xmm). */
12259 /* 66 0F 38 07 = PHSUBSW -- 16x8 signed qsub across from E (mem or
12260 xmm) and G to G (xmm). */
12261 /* 66 0F 38 01 = PHADDW -- 16x8 add across from E (mem or xmm) and
12263 /* 66 0F 38 05 = PHSUBW -- 16x8 sub across from E (mem or xmm) and
12265 /* 66 0F 38 02 = PHADDD -- 32x4 add across from E (mem or xmm) and
12267 /* 66 0F 38 06 = PHSUBD -- 32x4 sub across from E (mem or xmm) and
12271 && insn
[0] == 0x0F && insn
[1] == 0x38
12272 && (insn
[2] == 0x03 || insn
[2] == 0x07 || insn
[2] == 0x01
12273 || insn
[2] == 0x05 || insn
[2] == 0x02 || insn
[2] == 0x06)) {
12274 const HChar
* str
= "???";
12275 IROp opV64
= Iop_INVALID
;
12276 IROp opCatO
= Iop_CatOddLanes16x4
;
12277 IROp opCatE
= Iop_CatEvenLanes16x4
;
12278 IRTemp sV
= newTemp(Ity_V128
);
12279 IRTemp dV
= newTemp(Ity_V128
);
12280 IRTemp sHi
= newTemp(Ity_I64
);
12281 IRTemp sLo
= newTemp(Ity_I64
);
12282 IRTemp dHi
= newTemp(Ity_I64
);
12283 IRTemp dLo
= newTemp(Ity_I64
);
12288 case 0x03: opV64
= Iop_QAdd16Sx4
; str
= "addsw"; break;
12289 case 0x07: opV64
= Iop_QSub16Sx4
; str
= "subsw"; break;
12290 case 0x01: opV64
= Iop_Add16x4
; str
= "addw"; break;
12291 case 0x05: opV64
= Iop_Sub16x4
; str
= "subw"; break;
12292 case 0x02: opV64
= Iop_Add32x2
; str
= "addd"; break;
12293 case 0x06: opV64
= Iop_Sub32x2
; str
= "subd"; break;
12294 default: vassert(0);
12296 if (insn
[2] == 0x02 || insn
[2] == 0x06) {
12297 opCatO
= Iop_InterleaveHI32x2
;
12298 opCatE
= Iop_InterleaveLO32x2
;
12301 assign( dV
, getXMMReg(gregOfRM(modrm
)) );
12303 if (epartIsReg(modrm
)) {
12304 assign( sV
, getXMMReg( eregOfRM(modrm
)) );
12305 DIP("ph%s %s,%s\n", str
, nameXMMReg(eregOfRM(modrm
)),
12306 nameXMMReg(gregOfRM(modrm
)));
12309 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12310 gen_SEGV_if_not_16_aligned( addr
);
12311 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
12312 DIP("ph%s %s,%s\n", str
, dis_buf
,
12313 nameXMMReg(gregOfRM(modrm
)));
12317 assign( dHi
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
12318 assign( dLo
, unop(Iop_V128to64
, mkexpr(dV
)) );
12319 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
12320 assign( sLo
, unop(Iop_V128to64
, mkexpr(sV
)) );
12322 /* This isn't a particularly efficient way to compute the
12323 result, but at least it avoids a proliferation of IROps,
12324 hence avoids complication all the backends. */
12327 binop(Iop_64HLtoV128
,
12329 binop(opCatE
,mkexpr(sHi
),mkexpr(sLo
)),
12330 binop(opCatO
,mkexpr(sHi
),mkexpr(sLo
))
12333 binop(opCatE
,mkexpr(dHi
),mkexpr(dLo
)),
12334 binop(opCatO
,mkexpr(dHi
),mkexpr(dLo
))
12338 goto decode_success
;
12341 /* 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and Scale
12344 && insn
[0] == 0x0F && insn
[1] == 0x38 && insn
[2] == 0x0B) {
12345 IRTemp sV
= newTemp(Ity_I64
);
12346 IRTemp dV
= newTemp(Ity_I64
);
12350 assign( dV
, getMMXReg(gregOfRM(modrm
)) );
12352 if (epartIsReg(modrm
)) {
12353 assign( sV
, getMMXReg(eregOfRM(modrm
)) );
12355 DIP("pmulhrsw %s,%s\n", nameMMXReg(eregOfRM(modrm
)),
12356 nameMMXReg(gregOfRM(modrm
)));
12358 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12359 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
12361 DIP("pmulhrsw %s,%s\n", dis_buf
,
12362 nameMMXReg(gregOfRM(modrm
)));
12367 dis_PMULHRSW_helper( mkexpr(sV
), mkexpr(dV
) )
12369 goto decode_success
;
12372 /* 66 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and
12375 && insn
[0] == 0x0F && insn
[1] == 0x38 && insn
[2] == 0x0B) {
12376 IRTemp sV
= newTemp(Ity_V128
);
12377 IRTemp dV
= newTemp(Ity_V128
);
12378 IRTemp sHi
= newTemp(Ity_I64
);
12379 IRTemp sLo
= newTemp(Ity_I64
);
12380 IRTemp dHi
= newTemp(Ity_I64
);
12381 IRTemp dLo
= newTemp(Ity_I64
);
12384 assign( dV
, getXMMReg(gregOfRM(modrm
)) );
12386 if (epartIsReg(modrm
)) {
12387 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
12389 DIP("pmulhrsw %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
12390 nameXMMReg(gregOfRM(modrm
)));
12392 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12393 gen_SEGV_if_not_16_aligned( addr
);
12394 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
12396 DIP("pmulhrsw %s,%s\n", dis_buf
,
12397 nameXMMReg(gregOfRM(modrm
)));
12400 assign( dHi
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
12401 assign( dLo
, unop(Iop_V128to64
, mkexpr(dV
)) );
12402 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
12403 assign( sLo
, unop(Iop_V128to64
, mkexpr(sV
)) );
12407 binop(Iop_64HLtoV128
,
12408 dis_PMULHRSW_helper( mkexpr(sHi
), mkexpr(dHi
) ),
12409 dis_PMULHRSW_helper( mkexpr(sLo
), mkexpr(dLo
) )
12412 goto decode_success
;
12415 /* 0F 38 08 = PSIGNB -- Packed Sign 8x8 (MMX) */
12416 /* 0F 38 09 = PSIGNW -- Packed Sign 16x4 (MMX) */
12417 /* 0F 38 09 = PSIGND -- Packed Sign 32x2 (MMX) */
12419 && insn
[0] == 0x0F && insn
[1] == 0x38
12420 && (insn
[2] == 0x08 || insn
[2] == 0x09 || insn
[2] == 0x0A)) {
12421 IRTemp sV
= newTemp(Ity_I64
);
12422 IRTemp dV
= newTemp(Ity_I64
);
12423 const HChar
* str
= "???";
12427 case 0x08: laneszB
= 1; str
= "b"; break;
12428 case 0x09: laneszB
= 2; str
= "w"; break;
12429 case 0x0A: laneszB
= 4; str
= "d"; break;
12430 default: vassert(0);
12435 assign( dV
, getMMXReg(gregOfRM(modrm
)) );
12437 if (epartIsReg(modrm
)) {
12438 assign( sV
, getMMXReg(eregOfRM(modrm
)) );
12440 DIP("psign%s %s,%s\n", str
, nameMMXReg(eregOfRM(modrm
)),
12441 nameMMXReg(gregOfRM(modrm
)));
12443 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12444 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
12446 DIP("psign%s %s,%s\n", str
, dis_buf
,
12447 nameMMXReg(gregOfRM(modrm
)));
12452 dis_PSIGN_helper( mkexpr(sV
), mkexpr(dV
), laneszB
)
12454 goto decode_success
;
12457 /* 66 0F 38 08 = PSIGNB -- Packed Sign 8x16 (XMM) */
12458 /* 66 0F 38 09 = PSIGNW -- Packed Sign 16x8 (XMM) */
12459 /* 66 0F 38 09 = PSIGND -- Packed Sign 32x4 (XMM) */
12461 && insn
[0] == 0x0F && insn
[1] == 0x38
12462 && (insn
[2] == 0x08 || insn
[2] == 0x09 || insn
[2] == 0x0A)) {
12463 IRTemp sV
= newTemp(Ity_V128
);
12464 IRTemp dV
= newTemp(Ity_V128
);
12465 IRTemp sHi
= newTemp(Ity_I64
);
12466 IRTemp sLo
= newTemp(Ity_I64
);
12467 IRTemp dHi
= newTemp(Ity_I64
);
12468 IRTemp dLo
= newTemp(Ity_I64
);
12469 const HChar
* str
= "???";
12473 case 0x08: laneszB
= 1; str
= "b"; break;
12474 case 0x09: laneszB
= 2; str
= "w"; break;
12475 case 0x0A: laneszB
= 4; str
= "d"; break;
12476 default: vassert(0);
12480 assign( dV
, getXMMReg(gregOfRM(modrm
)) );
12482 if (epartIsReg(modrm
)) {
12483 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
12485 DIP("psign%s %s,%s\n", str
, nameXMMReg(eregOfRM(modrm
)),
12486 nameXMMReg(gregOfRM(modrm
)));
12488 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12489 gen_SEGV_if_not_16_aligned( addr
);
12490 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
12492 DIP("psign%s %s,%s\n", str
, dis_buf
,
12493 nameXMMReg(gregOfRM(modrm
)));
12496 assign( dHi
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
12497 assign( dLo
, unop(Iop_V128to64
, mkexpr(dV
)) );
12498 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
12499 assign( sLo
, unop(Iop_V128to64
, mkexpr(sV
)) );
12503 binop(Iop_64HLtoV128
,
12504 dis_PSIGN_helper( mkexpr(sHi
), mkexpr(dHi
), laneszB
),
12505 dis_PSIGN_helper( mkexpr(sLo
), mkexpr(dLo
), laneszB
)
12508 goto decode_success
;
12511 /* 0F 38 1C = PABSB -- Packed Absolute Value 8x8 (MMX) */
12512 /* 0F 38 1D = PABSW -- Packed Absolute Value 16x4 (MMX) */
12513 /* 0F 38 1E = PABSD -- Packed Absolute Value 32x2 (MMX) */
12515 && insn
[0] == 0x0F && insn
[1] == 0x38
12516 && (insn
[2] == 0x1C || insn
[2] == 0x1D || insn
[2] == 0x1E)) {
12517 IRTemp sV
= newTemp(Ity_I64
);
12518 const HChar
* str
= "???";
12522 case 0x1C: laneszB
= 1; str
= "b"; break;
12523 case 0x1D: laneszB
= 2; str
= "w"; break;
12524 case 0x1E: laneszB
= 4; str
= "d"; break;
12525 default: vassert(0);
12531 if (epartIsReg(modrm
)) {
12532 assign( sV
, getMMXReg(eregOfRM(modrm
)) );
12534 DIP("pabs%s %s,%s\n", str
, nameMMXReg(eregOfRM(modrm
)),
12535 nameMMXReg(gregOfRM(modrm
)));
12537 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12538 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
12540 DIP("pabs%s %s,%s\n", str
, dis_buf
,
12541 nameMMXReg(gregOfRM(modrm
)));
12546 dis_PABS_helper( mkexpr(sV
), laneszB
)
12548 goto decode_success
;
12551 /* 66 0F 38 1C = PABSB -- Packed Absolute Value 8x16 (XMM) */
12552 /* 66 0F 38 1D = PABSW -- Packed Absolute Value 16x8 (XMM) */
12553 /* 66 0F 38 1E = PABSD -- Packed Absolute Value 32x4 (XMM) */
12555 && insn
[0] == 0x0F && insn
[1] == 0x38
12556 && (insn
[2] == 0x1C || insn
[2] == 0x1D || insn
[2] == 0x1E)) {
12557 IRTemp sV
= newTemp(Ity_V128
);
12558 IRTemp sHi
= newTemp(Ity_I64
);
12559 IRTemp sLo
= newTemp(Ity_I64
);
12560 const HChar
* str
= "???";
12564 case 0x1C: laneszB
= 1; str
= "b"; break;
12565 case 0x1D: laneszB
= 2; str
= "w"; break;
12566 case 0x1E: laneszB
= 4; str
= "d"; break;
12567 default: vassert(0);
12572 if (epartIsReg(modrm
)) {
12573 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
12575 DIP("pabs%s %s,%s\n", str
, nameXMMReg(eregOfRM(modrm
)),
12576 nameXMMReg(gregOfRM(modrm
)));
12578 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12579 gen_SEGV_if_not_16_aligned( addr
);
12580 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
12582 DIP("pabs%s %s,%s\n", str
, dis_buf
,
12583 nameXMMReg(gregOfRM(modrm
)));
12586 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
12587 assign( sLo
, unop(Iop_V128to64
, mkexpr(sV
)) );
12591 binop(Iop_64HLtoV128
,
12592 dis_PABS_helper( mkexpr(sHi
), laneszB
),
12593 dis_PABS_helper( mkexpr(sLo
), laneszB
)
12596 goto decode_success
;
12599 /* 0F 3A 0F = PALIGNR -- Packed Align Right (MMX) */
12601 && insn
[0] == 0x0F && insn
[1] == 0x3A && insn
[2] == 0x0F) {
12602 IRTemp sV
= newTemp(Ity_I64
);
12603 IRTemp dV
= newTemp(Ity_I64
);
12604 IRTemp res
= newTemp(Ity_I64
);
12608 assign( dV
, getMMXReg(gregOfRM(modrm
)) );
12610 if (epartIsReg(modrm
)) {
12611 assign( sV
, getMMXReg(eregOfRM(modrm
)) );
12612 d32
= (UInt
)insn
[3+1];
12614 DIP("palignr $%u,%s,%s\n", d32
,
12615 nameMMXReg(eregOfRM(modrm
)),
12616 nameMMXReg(gregOfRM(modrm
)));
12618 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12619 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
12620 d32
= (UInt
)insn
[3+alen
];
12622 DIP("palignr $%u%s,%s\n", d32
,
12624 nameMMXReg(gregOfRM(modrm
)));
12628 assign( res
, mkexpr(sV
) );
12630 else if (d32
>= 1 && d32
<= 7) {
12633 binop(Iop_Shr64
, mkexpr(sV
), mkU8(8*d32
)),
12634 binop(Iop_Shl64
, mkexpr(dV
), mkU8(8*(8-d32
))
12637 else if (d32
== 8) {
12638 assign( res
, mkexpr(dV
) );
12640 else if (d32
>= 9 && d32
<= 15) {
12641 assign( res
, binop(Iop_Shr64
, mkexpr(dV
), mkU8(8*(d32
-8))) );
12643 else if (d32
>= 16 && d32
<= 255) {
12644 assign( res
, mkU64(0) );
12649 putMMXReg( gregOfRM(modrm
), mkexpr(res
) );
12650 goto decode_success
;
12653 /* 66 0F 3A 0F = PALIGNR -- Packed Align Right (XMM) */
12655 && insn
[0] == 0x0F && insn
[1] == 0x3A && insn
[2] == 0x0F) {
12656 IRTemp sV
= newTemp(Ity_V128
);
12657 IRTemp dV
= newTemp(Ity_V128
);
12658 IRTemp sHi
= newTemp(Ity_I64
);
12659 IRTemp sLo
= newTemp(Ity_I64
);
12660 IRTemp dHi
= newTemp(Ity_I64
);
12661 IRTemp dLo
= newTemp(Ity_I64
);
12662 IRTemp rHi
= newTemp(Ity_I64
);
12663 IRTemp rLo
= newTemp(Ity_I64
);
12666 assign( dV
, getXMMReg(gregOfRM(modrm
)) );
12668 if (epartIsReg(modrm
)) {
12669 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
12670 d32
= (UInt
)insn
[3+1];
12672 DIP("palignr $%u,%s,%s\n", d32
,
12673 nameXMMReg(eregOfRM(modrm
)),
12674 nameXMMReg(gregOfRM(modrm
)));
12676 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12677 gen_SEGV_if_not_16_aligned( addr
);
12678 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
12679 d32
= (UInt
)insn
[3+alen
];
12681 DIP("palignr $%u,%s,%s\n", d32
,
12683 nameXMMReg(gregOfRM(modrm
)));
12686 assign( dHi
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
12687 assign( dLo
, unop(Iop_V128to64
, mkexpr(dV
)) );
12688 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
12689 assign( sLo
, unop(Iop_V128to64
, mkexpr(sV
)) );
12692 assign( rHi
, mkexpr(sHi
) );
12693 assign( rLo
, mkexpr(sLo
) );
12695 else if (d32
>= 1 && d32
<= 7) {
12696 assign( rHi
, dis_PALIGNR_XMM_helper(dLo
, sHi
, d32
) );
12697 assign( rLo
, dis_PALIGNR_XMM_helper(sHi
, sLo
, d32
) );
12699 else if (d32
== 8) {
12700 assign( rHi
, mkexpr(dLo
) );
12701 assign( rLo
, mkexpr(sHi
) );
12703 else if (d32
>= 9 && d32
<= 15) {
12704 assign( rHi
, dis_PALIGNR_XMM_helper(dHi
, dLo
, d32
-8) );
12705 assign( rLo
, dis_PALIGNR_XMM_helper(dLo
, sHi
, d32
-8) );
12707 else if (d32
== 16) {
12708 assign( rHi
, mkexpr(dHi
) );
12709 assign( rLo
, mkexpr(dLo
) );
12711 else if (d32
>= 17 && d32
<= 23) {
12712 assign( rHi
, binop(Iop_Shr64
, mkexpr(dHi
), mkU8(8*(d32
-16))) );
12713 assign( rLo
, dis_PALIGNR_XMM_helper(dHi
, dLo
, d32
-16) );
12715 else if (d32
== 24) {
12716 assign( rHi
, mkU64(0) );
12717 assign( rLo
, mkexpr(dHi
) );
12719 else if (d32
>= 25 && d32
<= 31) {
12720 assign( rHi
, mkU64(0) );
12721 assign( rLo
, binop(Iop_Shr64
, mkexpr(dHi
), mkU8(8*(d32
-24))) );
12723 else if (d32
>= 32 && d32
<= 255) {
12724 assign( rHi
, mkU64(0) );
12725 assign( rLo
, mkU64(0) );
12732 binop(Iop_64HLtoV128
, mkexpr(rHi
), mkexpr(rLo
))
12734 goto decode_success
;
12737 /* 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x8 (MMX) */
12739 && insn
[0] == 0x0F && insn
[1] == 0x38 && insn
[2] == 0x00) {
12740 IRTemp sV
= newTemp(Ity_I64
);
12741 IRTemp dV
= newTemp(Ity_I64
);
12745 assign( dV
, getMMXReg(gregOfRM(modrm
)) );
12747 if (epartIsReg(modrm
)) {
12748 assign( sV
, getMMXReg(eregOfRM(modrm
)) );
12750 DIP("pshufb %s,%s\n", nameMMXReg(eregOfRM(modrm
)),
12751 nameMMXReg(gregOfRM(modrm
)));
12753 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12754 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
12756 DIP("pshufb %s,%s\n", dis_buf
,
12757 nameMMXReg(gregOfRM(modrm
)));
12764 /* permute the lanes */
12768 binop(Iop_And64
, mkexpr(sV
), mkU64(0x0707070707070707ULL
))
12770 /* mask off lanes which have (index & 0x80) == 0x80 */
12771 unop(Iop_Not64
, binop(Iop_SarN8x8
, mkexpr(sV
), mkU8(7)))
12774 goto decode_success
;
12777 /* 66 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x16 (XMM) */
12779 && insn
[0] == 0x0F && insn
[1] == 0x38 && insn
[2] == 0x00) {
12780 IRTemp sV
= newTemp(Ity_V128
);
12781 IRTemp dV
= newTemp(Ity_V128
);
12782 IRTemp sHi
= newTemp(Ity_I64
);
12783 IRTemp sLo
= newTemp(Ity_I64
);
12784 IRTemp dHi
= newTemp(Ity_I64
);
12785 IRTemp dLo
= newTemp(Ity_I64
);
12786 IRTemp rHi
= newTemp(Ity_I64
);
12787 IRTemp rLo
= newTemp(Ity_I64
);
12788 IRTemp sevens
= newTemp(Ity_I64
);
12789 IRTemp mask0x80hi
= newTemp(Ity_I64
);
12790 IRTemp mask0x80lo
= newTemp(Ity_I64
);
12791 IRTemp maskBit3hi
= newTemp(Ity_I64
);
12792 IRTemp maskBit3lo
= newTemp(Ity_I64
);
12793 IRTemp sAnd7hi
= newTemp(Ity_I64
);
12794 IRTemp sAnd7lo
= newTemp(Ity_I64
);
12795 IRTemp permdHi
= newTemp(Ity_I64
);
12796 IRTemp permdLo
= newTemp(Ity_I64
);
12799 assign( dV
, getXMMReg(gregOfRM(modrm
)) );
12801 if (epartIsReg(modrm
)) {
12802 assign( sV
, getXMMReg(eregOfRM(modrm
)) );
12804 DIP("pshufb %s,%s\n", nameXMMReg(eregOfRM(modrm
)),
12805 nameXMMReg(gregOfRM(modrm
)));
12807 addr
= disAMode ( &alen
, sorb
, delta
+3, dis_buf
);
12808 gen_SEGV_if_not_16_aligned( addr
);
12809 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
12811 DIP("pshufb %s,%s\n", dis_buf
,
12812 nameXMMReg(gregOfRM(modrm
)));
12815 assign( dHi
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
12816 assign( dLo
, unop(Iop_V128to64
, mkexpr(dV
)) );
12817 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
12818 assign( sLo
, unop(Iop_V128to64
, mkexpr(sV
)) );
12820 assign( sevens
, mkU64(0x0707070707070707ULL
) );
12823 mask0x80hi = Not(SarN8x8(sHi,7))
12824 maskBit3hi = SarN8x8(ShlN8x8(sHi,4),7)
12825 sAnd7hi = And(sHi,sevens)
12826 permdHi = Or( And(Perm8x8(dHi,sAnd7hi),maskBit3hi),
12827 And(Perm8x8(dLo,sAnd7hi),Not(maskBit3hi)) )
12828 rHi = And(permdHi,mask0x80hi)
12832 unop(Iop_Not64
, binop(Iop_SarN8x8
,mkexpr(sHi
),mkU8(7))));
12837 binop(Iop_ShlN8x8
,mkexpr(sHi
),mkU8(4)),
12840 assign(sAnd7hi
, binop(Iop_And64
,mkexpr(sHi
),mkexpr(sevens
)));
12847 binop(Iop_Perm8x8
,mkexpr(dHi
),mkexpr(sAnd7hi
)),
12848 mkexpr(maskBit3hi
)),
12850 binop(Iop_Perm8x8
,mkexpr(dLo
),mkexpr(sAnd7hi
)),
12851 unop(Iop_Not64
,mkexpr(maskBit3hi
))) ));
12853 assign(rHi
, binop(Iop_And64
,mkexpr(permdHi
),mkexpr(mask0x80hi
)) );
12855 /* And the same for the lower half of the result. What fun. */
12859 unop(Iop_Not64
, binop(Iop_SarN8x8
,mkexpr(sLo
),mkU8(7))));
12864 binop(Iop_ShlN8x8
,mkexpr(sLo
),mkU8(4)),
12867 assign(sAnd7lo
, binop(Iop_And64
,mkexpr(sLo
),mkexpr(sevens
)));
12874 binop(Iop_Perm8x8
,mkexpr(dHi
),mkexpr(sAnd7lo
)),
12875 mkexpr(maskBit3lo
)),
12877 binop(Iop_Perm8x8
,mkexpr(dLo
),mkexpr(sAnd7lo
)),
12878 unop(Iop_Not64
,mkexpr(maskBit3lo
))) ));
12880 assign(rLo
, binop(Iop_And64
,mkexpr(permdLo
),mkexpr(mask0x80lo
)) );
12884 binop(Iop_64HLtoV128
, mkexpr(rHi
), mkexpr(rLo
))
12886 goto decode_success
;
12889 /* 0F 38 F0 = MOVBE m16/32(E), r16/32(G) */
12890 /* 0F 38 F1 = MOVBE r16/32(G), m16/32(E) */
12891 if ((sz
== 2 || sz
== 4)
12892 && insn
[0] == 0x0F && insn
[1] == 0x38
12893 && (insn
[2] == 0xF0 || insn
[2] == 0xF1)
12894 && !epartIsReg(insn
[3])) {
12897 addr
= disAMode(&alen
, sorb
, delta
+ 3, dis_buf
);
12900 IRTemp src
= newTemp(ty
);
12902 if (insn
[2] == 0xF0) { /* LOAD */
12903 assign(src
, loadLE(ty
, mkexpr(addr
)));
12904 IRTemp dst
= math_BSWAP(src
, ty
);
12905 putIReg(sz
, gregOfRM(modrm
), mkexpr(dst
));
12906 DIP("movbe %s,%s\n", dis_buf
, nameIReg(sz
, gregOfRM(modrm
)));
12907 } else { /* STORE */
12908 assign(src
, getIReg(sz
, gregOfRM(modrm
)));
12909 IRTemp dst
= math_BSWAP(src
, ty
);
12910 storeLE(mkexpr(addr
), mkexpr(dst
));
12911 DIP("movbe %s,%s\n", nameIReg(sz
, gregOfRM(modrm
)), dis_buf
);
12913 goto decode_success
;
12916 /* ---------------------------------------------------- */
12917 /* --- end of the SSSE3 decoder. --- */
12918 /* ---------------------------------------------------- */
12920 /* ---------------------------------------------------- */
12921 /* --- start of the SSE4 decoder --- */
12922 /* ---------------------------------------------------- */
12924 /* 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1
12925 (Partial implementation only -- only deal with cases where
12926 the rounding mode is specified directly by the immediate byte.)
12927 66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1
12928 (Limitations ditto)
12931 && insn
[0] == 0x0F && insn
[1] == 0x3A
12932 && (insn
[2] == 0x0B || insn
[2] == 0x0A)) {
12934 Bool isD
= insn
[2] == 0x0B;
12935 IRTemp src
= newTemp(isD
? Ity_F64
: Ity_F32
);
12936 IRTemp res
= newTemp(isD
? Ity_F64
: Ity_F32
);
12941 if (epartIsReg(modrm
)) {
12943 isD
? getXMMRegLane64F( eregOfRM(modrm
), 0 )
12944 : getXMMRegLane32F( eregOfRM(modrm
), 0 ) );
12946 if (imm
& ~3) goto decode_failure
;
12948 DIP( "rounds%c $%d,%s,%s\n",
12950 imm
, nameXMMReg( eregOfRM(modrm
) ),
12951 nameXMMReg( gregOfRM(modrm
) ) );
12953 addr
= disAMode( &alen
, sorb
, delta
+3, dis_buf
);
12954 assign( src
, loadLE( isD
? Ity_F64
: Ity_F32
, mkexpr(addr
) ));
12955 imm
= insn
[3+alen
];
12956 if (imm
& ~3) goto decode_failure
;
12958 DIP( "roundsd $%d,%s,%s\n",
12959 imm
, dis_buf
, nameXMMReg( gregOfRM(modrm
) ) );
12962 /* (imm & 3) contains an Intel-encoded rounding mode. Because
12963 that encoding is the same as the encoding for IRRoundingMode,
12964 we can use that value directly in the IR as a rounding
12966 assign(res
, binop(isD
? Iop_RoundF64toInt
: Iop_RoundF32toInt
,
12967 mkU32(imm
& 3), mkexpr(src
)) );
12970 putXMMRegLane64F( gregOfRM(modrm
), 0, mkexpr(res
) );
12972 putXMMRegLane32F( gregOfRM(modrm
), 0, mkexpr(res
) );
12974 goto decode_success
;
12977 /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension,
12978 which we can only decode if we're sure this is an AMD cpu that
12979 supports LZCNT, since otherwise it's BSR, which behaves
12981 if (insn
[0] == 0xF3 && insn
[1] == 0x0F && insn
[2] == 0xBD
12982 && 0 != (archinfo
->hwcaps
& VEX_HWCAPS_X86_LZCNT
)) {
12983 vassert(sz
== 2 || sz
== 4);
12984 /*IRType*/ ty
= szToITy(sz
);
12985 IRTemp src
= newTemp(ty
);
12987 if (epartIsReg(modrm
)) {
12988 assign(src
, getIReg(sz
, eregOfRM(modrm
)));
12990 DIP("lzcnt%c %s, %s\n", nameISize(sz
),
12991 nameIReg(sz
, eregOfRM(modrm
)),
12992 nameIReg(sz
, gregOfRM(modrm
)));
12994 addr
= disAMode( &alen
, sorb
, delta
+3, dis_buf
);
12995 assign(src
, loadLE(ty
, mkexpr(addr
)));
12997 DIP("lzcnt%c %s, %s\n", nameISize(sz
), dis_buf
,
12998 nameIReg(sz
, gregOfRM(modrm
)));
13001 IRTemp res
= gen_LZCNT(ty
, src
);
13002 putIReg(sz
, gregOfRM(modrm
), mkexpr(res
));
13004 // Update flags. This is pretty lame .. perhaps can do better
13005 // if this turns out to be performance critical.
13006 // O S A P are cleared. Z is set if RESULT == 0.
13007 // C is set if SRC is zero.
13008 IRTemp src32
= newTemp(Ity_I32
);
13009 IRTemp res32
= newTemp(Ity_I32
);
13010 assign(src32
, widenUto32(mkexpr(src
)));
13011 assign(res32
, widenUto32(mkexpr(res
)));
13013 IRTemp oszacp
= newTemp(Ity_I32
);
13019 binop(Iop_CmpEQ32
, mkexpr(res32
), mkU32(0))),
13020 mkU8(X86G_CC_SHIFT_Z
)),
13023 binop(Iop_CmpEQ32
, mkexpr(src32
), mkU32(0))),
13024 mkU8(X86G_CC_SHIFT_C
))
13028 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
13029 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
13030 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
13031 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(oszacp
) ));
13033 goto decode_success
;
13036 /* ---------------------------------------------------- */
13037 /* --- end of the SSE4 decoder --- */
13038 /* ---------------------------------------------------- */
13040 after_sse_decoders
:
13042 /* ---------------------------------------------------- */
13043 /* --- deal with misc 0x67 pfxs (addr size override) -- */
13044 /* ---------------------------------------------------- */
13046 /* 67 E3 = JCXZ (for JECXZ see below) */
13047 if (insn
[0] == 0x67 && insn
[1] == 0xE3 && sz
== 4) {
13049 d32
= (((Addr32
)guest_EIP_bbstart
)+delta
+1) + getSDisp8(delta
);
13052 binop(Iop_CmpEQ16
, getIReg(2,R_ECX
), mkU16(0)),
13057 DIP("jcxz 0x%x\n", d32
);
13058 goto decode_success
;
13061 /* 67 E8 = CALL with redundant addr16 prefix */
13062 if (insn
[0] == 0x67 && insn
[1] == 0xE8) {
13066 /* ---------------------------------------------------- */
13067 /* --- start of the baseline insn decoder -- */
13068 /* ---------------------------------------------------- */
13070 /* Get the primary opcode. */
13071 opc
= getIByte(delta
); delta
++;
13073 /* We get here if the current insn isn't SSE, or this CPU doesn't
13078 /* ------------------------ Control flow --------------- */
13080 case 0xC2: /* RET imm16 */
13081 d32
= getUDisp16(delta
);
13083 dis_ret(&dres
, d32
);
13084 DIP("ret %u\n", d32
);
13086 case 0xC3: /* RET */
13091 case 0xCF: /* IRET */
13092 /* Note, this is an extremely kludgey and limited implementation
13093 of iret. All it really does is:
13094 popl %EIP; popl %CS; popl %EFLAGS.
13095 %CS is set but ignored (as it is in (eg) popw %cs)". */
13096 t1
= newTemp(Ity_I32
); /* ESP */
13097 t2
= newTemp(Ity_I32
); /* new EIP */
13098 t3
= newTemp(Ity_I32
); /* new CS */
13099 t4
= newTemp(Ity_I32
); /* new EFLAGS */
13100 assign(t1
, getIReg(4,R_ESP
));
13101 assign(t2
, loadLE(Ity_I32
, binop(Iop_Add32
,mkexpr(t1
),mkU32(0) )));
13102 assign(t3
, loadLE(Ity_I32
, binop(Iop_Add32
,mkexpr(t1
),mkU32(4) )));
13103 assign(t4
, loadLE(Ity_I32
, binop(Iop_Add32
,mkexpr(t1
),mkU32(8) )));
13104 /* Get stuff off stack */
13105 putIReg(4, R_ESP
,binop(Iop_Add32
, mkexpr(t1
), mkU32(12)));
13106 /* set %CS (which is ignored anyway) */
13107 putSReg( R_CS
, unop(Iop_32to16
, mkexpr(t3
)) );
13109 set_EFLAGS_from_value( t4
, False
/*!emit_AC_emwarn*/, 0/*unused*/ );
13110 /* goto new EIP value */
13111 jmp_treg(&dres
, Ijk_Ret
, t2
);
13112 vassert(dres
.whatNext
== Dis_StopHere
);
13113 DIP("iret (very kludgey)\n");
13116 case 0xE8: /* CALL J4 */
13117 d32
= getUDisp32(delta
); delta
+= 4;
13118 d32
+= (guest_EIP_bbstart
+delta
);
13119 /* (guest_eip_bbstart+delta) == return-to addr, d32 == call-to addr */
13120 if (d32
== guest_EIP_bbstart
+delta
&& getIByte(delta
) >= 0x58
13121 && getIByte(delta
) <= 0x5F) {
13122 /* Specially treat the position-independent-code idiom
13127 since this generates better code, but for no other reason. */
13128 Int archReg
= getIByte(delta
) - 0x58;
13129 /* vex_printf("-- fPIC thingy\n"); */
13130 putIReg(4, archReg
, mkU32(guest_EIP_bbstart
+delta
));
13131 delta
++; /* Step over the POP */
13132 DIP("call 0x%x ; popl %s\n",d32
,nameIReg(4,archReg
));
13134 /* The normal sequence for a call. */
13135 t1
= newTemp(Ity_I32
);
13136 assign(t1
, binop(Iop_Sub32
, getIReg(4,R_ESP
), mkU32(4)));
13137 putIReg(4, R_ESP
, mkexpr(t1
));
13138 storeLE( mkexpr(t1
), mkU32(guest_EIP_bbstart
+delta
));
13139 jmp_lit(&dres
, Ijk_Call
, d32
);
13140 vassert(dres
.whatNext
== Dis_StopHere
);
13141 DIP("call 0x%x\n",d32
);
13145 //-- case 0xC8: /* ENTER */
13146 //-- d32 = getUDisp16(eip); eip += 2;
13147 //-- abyte = getIByte(delta); delta++;
13149 //-- vg_assert(sz == 4);
13150 //-- vg_assert(abyte == 0);
13152 //-- t1 = newTemp(cb); t2 = newTemp(cb);
13153 //-- uInstr2(cb, GET, sz, ArchReg, R_EBP, TempReg, t1);
13154 //-- uInstr2(cb, GET, 4, ArchReg, R_ESP, TempReg, t2);
13155 //-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2);
13156 //-- uLiteral(cb, sz);
13157 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP);
13158 //-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2);
13159 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_EBP);
13161 //-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2);
13162 //-- uLiteral(cb, d32);
13163 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP);
13165 //-- DIP("enter 0x%x, 0x%x", d32, abyte);
13168 case 0xC9: /* LEAVE */
13170 t1
= newTemp(Ity_I32
); t2
= newTemp(Ity_I32
);
13171 assign(t1
, getIReg(4,R_EBP
));
13172 /* First PUT ESP looks redundant, but need it because ESP must
13173 always be up-to-date for Memcheck to work... */
13174 putIReg(4, R_ESP
, mkexpr(t1
));
13175 assign(t2
, loadLE(Ity_I32
,mkexpr(t1
)));
13176 putIReg(4, R_EBP
, mkexpr(t2
));
13177 putIReg(4, R_ESP
, binop(Iop_Add32
, mkexpr(t1
), mkU32(4)) );
13181 /* ---------------- Misc weird-ass insns --------------- */
13183 case 0x27: /* DAA */
13184 case 0x2F: /* DAS */
13185 case 0x37: /* AAA */
13186 case 0x3F: /* AAS */
13187 /* An ugly implementation for some ugly instructions. Oh
13189 if (sz
!= 4) goto decode_failure
;
13190 t1
= newTemp(Ity_I32
);
13191 t2
= newTemp(Ity_I32
);
13192 /* Make up a 32-bit value (t1), with the old value of AX in the
13193 bottom 16 bits, and the old OSZACP bitmask in the upper 16
13196 binop(Iop_16HLto32
,
13198 mk_x86g_calculate_eflags_all()),
13201 /* Call the helper fn, to get a new AX and OSZACP value, and
13202 poke both back into the guest state. Also pass the helper
13203 the actual opcode so it knows which of the 4 instructions it
13204 is doing the computation for. */
13205 vassert(opc
== 0x27 || opc
== 0x2F || opc
== 0x37 || opc
== 0x3F);
13208 Ity_I32
, 0/*regparm*/, "x86g_calculate_daa_das_aaa_aas",
13209 &x86g_calculate_daa_das_aaa_aas
,
13210 mkIRExprVec_2( mkexpr(t1
), mkU32( opc
& 0xFF) )
13212 putIReg(2, R_EAX
, unop(Iop_32to16
, mkexpr(t2
) ));
13214 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
13215 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
13216 stmt( IRStmt_Put( OFFB_CC_DEP1
,
13218 binop(Iop_Shr32
, mkexpr(t2
), mkU8(16)),
13219 mkU32( X86G_CC_MASK_C
| X86G_CC_MASK_P
13220 | X86G_CC_MASK_A
| X86G_CC_MASK_Z
13221 | X86G_CC_MASK_S
| X86G_CC_MASK_O
)
13225 /* Set NDEP even though it isn't used. This makes redundant-PUT
13226 elimination of previous stores to this field work better. */
13227 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
13229 case 0x27: DIP("daa\n"); break;
13230 case 0x2F: DIP("das\n"); break;
13231 case 0x37: DIP("aaa\n"); break;
13232 case 0x3F: DIP("aas\n"); break;
13233 default: vassert(0);
13237 case 0xD4: /* AAM */
13238 case 0xD5: /* AAD */
13239 d32
= getIByte(delta
); delta
++;
13240 if (sz
!= 4 || d32
!= 10) goto decode_failure
;
13241 t1
= newTemp(Ity_I32
);
13242 t2
= newTemp(Ity_I32
);
13243 /* Make up a 32-bit value (t1), with the old value of AX in the
13244 bottom 16 bits, and the old OSZACP bitmask in the upper 16
13247 binop(Iop_16HLto32
,
13249 mk_x86g_calculate_eflags_all()),
13252 /* Call the helper fn, to get a new AX and OSZACP value, and
13253 poke both back into the guest state. Also pass the helper
13254 the actual opcode so it knows which of the 2 instructions it
13255 is doing the computation for. */
13258 Ity_I32
, 0/*regparm*/, "x86g_calculate_aad_aam",
13259 &x86g_calculate_aad_aam
,
13260 mkIRExprVec_2( mkexpr(t1
), mkU32( opc
& 0xFF) )
13262 putIReg(2, R_EAX
, unop(Iop_32to16
, mkexpr(t2
) ));
13264 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
13265 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
13266 stmt( IRStmt_Put( OFFB_CC_DEP1
,
13268 binop(Iop_Shr32
, mkexpr(t2
), mkU8(16)),
13269 mkU32( X86G_CC_MASK_C
| X86G_CC_MASK_P
13270 | X86G_CC_MASK_A
| X86G_CC_MASK_Z
13271 | X86G_CC_MASK_S
| X86G_CC_MASK_O
)
13275 /* Set NDEP even though it isn't used. This makes
13276 redundant-PUT elimination of previous stores to this field
13278 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
13280 DIP(opc
== 0xD4 ? "aam\n" : "aad\n");
13283 /* ------------------------ CWD/CDQ -------------------- */
13285 case 0x98: /* CBW */
13287 putIReg(4, R_EAX
, unop(Iop_16Sto32
, getIReg(2, R_EAX
)));
13291 putIReg(2, R_EAX
, unop(Iop_8Sto16
, getIReg(1, R_EAX
)));
13296 case 0x99: /* CWD/CDQ */
13299 binop(mkSizedOp(ty
,Iop_Sar8
),
13300 getIReg(sz
, R_EAX
),
13301 mkU8(sz
== 2 ? 15 : 31)) );
13302 DIP(sz
== 2 ? "cwdq\n" : "cdqq\n");
13305 /* ------------------------ FPU ops -------------------- */
13307 case 0x9E: /* SAHF */
13312 case 0x9F: /* LAHF */
13317 case 0x9B: /* FWAIT */
13330 Int delta0
= delta
;
13331 Bool decode_OK
= False
;
13332 delta
= dis_FPU ( &decode_OK
, sorb
, delta
);
13335 goto decode_failure
;
13340 /* ------------------------ INC & DEC ------------------ */
13342 case 0x40: /* INC eAX */
13343 case 0x41: /* INC eCX */
13344 case 0x42: /* INC eDX */
13345 case 0x43: /* INC eBX */
13346 case 0x44: /* INC eSP */
13347 case 0x45: /* INC eBP */
13348 case 0x46: /* INC eSI */
13349 case 0x47: /* INC eDI */
13350 vassert(sz
== 2 || sz
== 4);
13353 assign( t1
, binop(mkSizedOp(ty
,Iop_Add8
),
13354 getIReg(sz
, (UInt
)(opc
- 0x40)),
13356 setFlags_INC_DEC( True
, t1
, ty
);
13357 putIReg(sz
, (UInt
)(opc
- 0x40), mkexpr(t1
));
13358 DIP("inc%c %s\n", nameISize(sz
), nameIReg(sz
,opc
-0x40));
13361 case 0x48: /* DEC eAX */
13362 case 0x49: /* DEC eCX */
13363 case 0x4A: /* DEC eDX */
13364 case 0x4B: /* DEC eBX */
13365 case 0x4C: /* DEC eSP */
13366 case 0x4D: /* DEC eBP */
13367 case 0x4E: /* DEC eSI */
13368 case 0x4F: /* DEC eDI */
13369 vassert(sz
== 2 || sz
== 4);
13372 assign( t1
, binop(mkSizedOp(ty
,Iop_Sub8
),
13373 getIReg(sz
, (UInt
)(opc
- 0x48)),
13375 setFlags_INC_DEC( False
, t1
, ty
);
13376 putIReg(sz
, (UInt
)(opc
- 0x48), mkexpr(t1
));
13377 DIP("dec%c %s\n", nameISize(sz
), nameIReg(sz
,opc
-0x48));
13380 /* ------------------------ INT ------------------------ */
13382 case 0xCC: /* INT 3 */
13383 jmp_lit(&dres
, Ijk_SigTRAP
, ((Addr32
)guest_EIP_bbstart
)+delta
);
13384 vassert(dres
.whatNext
== Dis_StopHere
);
13388 case 0xCD: /* INT imm8 */
13389 d32
= getIByte(delta
); delta
++;
13391 /* For any of the cases where we emit a jump (that is, for all
13392 currently handled cases), it's important that all ArchRegs
13393 carry their up-to-date value at this point. So we declare an
13394 end-of-block here, which forces any TempRegs caching ArchRegs
13397 /* Handle int $0x3F .. $0x4F by synthesising a segfault and a
13398 restart of this instruction (hence the "-2" two lines below,
13399 to get the restart EIP to be this instruction. This is
13400 probably Linux-specific and it would be more correct to only
13401 do this if the VexAbiInfo says that is what we should do.
13402 This used to handle just 0x40-0x43; Jikes RVM uses a larger
13403 range (0x3F-0x49), and this allows some slack as well. */
13404 if (d32
>= 0x3F && d32
<= 0x4F) {
13405 jmp_lit(&dres
, Ijk_SigSEGV
, ((Addr32
)guest_EIP_bbstart
)+delta
-2);
13406 vassert(dres
.whatNext
== Dis_StopHere
);
13407 DIP("int $0x%x\n", d32
);
13411 /* Handle int $0x80 (linux syscalls), int $0x81 and $0x82
13412 (darwin syscalls), int $0x91 (Solaris syscalls) and int $0xD2
13413 (Solaris fasttrap syscalls). As part of this, note where we are, so we
13414 can back up the guest to this point if the syscall needs to
13416 IRJumpKind jump_kind
;
13419 jump_kind
= Ijk_Sys_int128
;
13422 jump_kind
= Ijk_Sys_int129
;
13425 jump_kind
= Ijk_Sys_int130
;
13428 jump_kind
= Ijk_Sys_int145
;
13431 jump_kind
= Ijk_Sys_int210
;
13434 /* none of the above */
13435 goto decode_failure
;
13438 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL
,
13439 mkU32(guest_EIP_curr_instr
) ) );
13440 jmp_lit(&dres
, jump_kind
, ((Addr32
)guest_EIP_bbstart
)+delta
);
13441 vassert(dres
.whatNext
== Dis_StopHere
);
13442 DIP("int $0x%x\n", d32
);
13445 /* ------------------------ Jcond, byte offset --------- */
13447 case 0xEB: /* Jb (jump, byte offset) */
13448 d32
= (((Addr32
)guest_EIP_bbstart
)+delta
+1) + getSDisp8(delta
);
13450 jmp_lit(&dres
, Ijk_Boring
, d32
);
13451 vassert(dres
.whatNext
== Dis_StopHere
);
13452 DIP("jmp-8 0x%x\n", d32
);
13455 case 0xE9: /* Jv (jump, 16/32 offset) */
13456 vassert(sz
== 4); /* JRS added 2004 July 11 */
13457 d32
= (((Addr32
)guest_EIP_bbstart
)+delta
+sz
) + getSDisp(sz
,delta
);
13459 jmp_lit(&dres
, Ijk_Boring
, d32
);
13460 vassert(dres
.whatNext
== Dis_StopHere
);
13461 DIP("jmp 0x%x\n", d32
);
13466 case 0x72: /* JBb/JNAEb (jump below) */
13467 case 0x73: /* JNBb/JAEb (jump not below) */
13468 case 0x74: /* JZb/JEb (jump zero) */
13469 case 0x75: /* JNZb/JNEb (jump not zero) */
13470 case 0x76: /* JBEb/JNAb (jump below or equal) */
13471 case 0x77: /* JNBEb/JAb (jump not below or equal) */
13472 case 0x78: /* JSb (jump negative) */
13473 case 0x79: /* JSb (jump not negative) */
13474 case 0x7A: /* JP (jump parity even) */
13475 case 0x7B: /* JNP/JPO (jump parity odd) */
13476 case 0x7C: /* JLb/JNGEb (jump less) */
13477 case 0x7D: /* JGEb/JNLb (jump greater or equal) */
13478 case 0x7E: /* JLEb/JNGb (jump less or equal) */
13479 case 0x7F: /* JGb/JNLEb (jump greater) */
13481 const HChar
* comment
= "";
13482 jmpDelta
= (Int
)getSDisp8(delta
);
13483 vassert(-128 <= jmpDelta
&& jmpDelta
< 128);
13484 d32
= (((Addr32
)guest_EIP_bbstart
)+delta
+1) + jmpDelta
;
13486 /* End the block at this point. */
13487 jcc_01( &dres
, (X86Condcode
)(opc
- 0x70),
13488 (Addr32
)(guest_EIP_bbstart
+delta
), d32
);
13489 vassert(dres
.whatNext
== Dis_StopHere
);
13490 DIP("j%s-8 0x%x %s\n", name_X86Condcode(opc
- 0x70), d32
, comment
);
13494 case 0xE3: /* JECXZ (for JCXZ see above) */
13495 if (sz
!= 4) goto decode_failure
;
13496 d32
= (((Addr32
)guest_EIP_bbstart
)+delta
+1) + getSDisp8(delta
);
13499 binop(Iop_CmpEQ32
, getIReg(4,R_ECX
), mkU32(0)),
13504 DIP("jecxz 0x%x\n", d32
);
13507 case 0xE0: /* LOOPNE disp8: decrement count, jump if count != 0 && ZF==0 */
13508 case 0xE1: /* LOOPE disp8: decrement count, jump if count != 0 && ZF==1 */
13509 case 0xE2: /* LOOP disp8: decrement count, jump if count != 0 */
13510 { /* Again, the docs say this uses ECX/CX as a count depending on
13511 the address size override, not the operand one. Since we
13512 don't handle address size overrides, I guess that means
13514 IRExpr
* zbit
= NULL
;
13515 IRExpr
* count
= NULL
;
13516 IRExpr
* cond
= NULL
;
13517 const HChar
* xtra
= NULL
;
13519 if (sz
!= 4) goto decode_failure
;
13520 d32
= (((Addr32
)guest_EIP_bbstart
)+delta
+1) + getSDisp8(delta
);
13522 putIReg(4, R_ECX
, binop(Iop_Sub32
, getIReg(4,R_ECX
), mkU32(1)));
13524 count
= getIReg(4,R_ECX
);
13525 cond
= binop(Iop_CmpNE32
, count
, mkU32(0));
13532 zbit
= mk_x86g_calculate_condition( X86CondZ
);
13533 cond
= mkAnd1(cond
, zbit
);
13537 zbit
= mk_x86g_calculate_condition( X86CondNZ
);
13538 cond
= mkAnd1(cond
, zbit
);
13543 stmt( IRStmt_Exit(cond
, Ijk_Boring
, IRConst_U32(d32
), OFFB_EIP
) );
13545 DIP("loop%s 0x%x\n", xtra
, d32
);
13549 /* ------------------------ IMUL ----------------------- */
13551 case 0x69: /* IMUL Iv, Ev, Gv */
13552 delta
= dis_imul_I_E_G ( sorb
, sz
, delta
, sz
);
13554 case 0x6B: /* IMUL Ib, Ev, Gv */
13555 delta
= dis_imul_I_E_G ( sorb
, sz
, delta
, 1 );
13558 /* ------------------------ MOV ------------------------ */
13560 case 0x88: /* MOV Gb,Eb */
13561 delta
= dis_mov_G_E(sorb
, 1, delta
);
13564 case 0x89: /* MOV Gv,Ev */
13565 delta
= dis_mov_G_E(sorb
, sz
, delta
);
13568 case 0x8A: /* MOV Eb,Gb */
13569 delta
= dis_mov_E_G(sorb
, 1, delta
);
13572 case 0x8B: /* MOV Ev,Gv */
13573 delta
= dis_mov_E_G(sorb
, sz
, delta
);
13576 case 0x8D: /* LEA M,Gv */
13578 goto decode_failure
;
13579 modrm
= getIByte(delta
);
13580 if (epartIsReg(modrm
))
13581 goto decode_failure
;
13582 /* NOTE! this is the one place where a segment override prefix
13583 has no effect on the address calculation. Therefore we pass
13584 zero instead of sorb here. */
13585 addr
= disAMode ( &alen
, /*sorb*/ 0, delta
, dis_buf
);
13587 putIReg(sz
, gregOfRM(modrm
), mkexpr(addr
));
13588 DIP("lea%c %s, %s\n", nameISize(sz
), dis_buf
,
13589 nameIReg(sz
,gregOfRM(modrm
)));
13592 case 0x8C: /* MOV Sw,Ew -- MOV from a SEGMENT REGISTER */
13593 delta
= dis_mov_Sw_Ew(sorb
, sz
, delta
);
13596 case 0x8E: /* MOV Ew,Sw -- MOV to a SEGMENT REGISTER */
13597 delta
= dis_mov_Ew_Sw(sorb
, delta
);
13600 case 0xA0: /* MOV Ob,AL */
13602 /* Fall through ... */
13603 case 0xA1: /* MOV Ov,eAX */
13604 d32
= getUDisp32(delta
); delta
+= 4;
13606 addr
= newTemp(Ity_I32
);
13607 assign( addr
, handleSegOverride(sorb
, mkU32(d32
)) );
13608 putIReg(sz
, R_EAX
, loadLE(ty
, mkexpr(addr
)));
13609 DIP("mov%c %s0x%x, %s\n", nameISize(sz
), sorbTxt(sorb
),
13610 d32
, nameIReg(sz
,R_EAX
));
13613 case 0xA2: /* MOV Ob,AL */
13615 /* Fall through ... */
13616 case 0xA3: /* MOV eAX,Ov */
13617 d32
= getUDisp32(delta
); delta
+= 4;
13619 addr
= newTemp(Ity_I32
);
13620 assign( addr
, handleSegOverride(sorb
, mkU32(d32
)) );
13621 storeLE( mkexpr(addr
), getIReg(sz
,R_EAX
) );
13622 DIP("mov%c %s, %s0x%x\n", nameISize(sz
), nameIReg(sz
,R_EAX
),
13623 sorbTxt(sorb
), d32
);
13626 case 0xB0: /* MOV imm,AL */
13627 case 0xB1: /* MOV imm,CL */
13628 case 0xB2: /* MOV imm,DL */
13629 case 0xB3: /* MOV imm,BL */
13630 case 0xB4: /* MOV imm,AH */
13631 case 0xB5: /* MOV imm,CH */
13632 case 0xB6: /* MOV imm,DH */
13633 case 0xB7: /* MOV imm,BH */
13634 d32
= getIByte(delta
); delta
+= 1;
13635 putIReg(1, opc
-0xB0, mkU8(d32
));
13636 DIP("movb $0x%x,%s\n", d32
, nameIReg(1,opc
-0xB0));
13639 case 0xB8: /* MOV imm,eAX */
13640 case 0xB9: /* MOV imm,eCX */
13641 case 0xBA: /* MOV imm,eDX */
13642 case 0xBB: /* MOV imm,eBX */
13643 case 0xBC: /* MOV imm,eSP */
13644 case 0xBD: /* MOV imm,eBP */
13645 case 0xBE: /* MOV imm,eSI */
13646 case 0xBF: /* MOV imm,eDI */
13647 d32
= getUDisp(sz
,delta
); delta
+= sz
;
13648 putIReg(sz
, opc
-0xB8, mkU(szToITy(sz
), d32
));
13649 DIP("mov%c $0x%x,%s\n", nameISize(sz
), d32
, nameIReg(sz
,opc
-0xB8));
13652 case 0xC6: /* C6 /0 = MOV Ib,Eb */
13654 goto maybe_do_Mov_I_E
;
13655 case 0xC7: /* C7 /0 = MOV Iv,Ev */
13656 goto maybe_do_Mov_I_E
;
13659 modrm
= getIByte(delta
);
13660 if (gregOfRM(modrm
) == 0) {
13661 if (epartIsReg(modrm
)) {
13662 delta
++; /* mod/rm byte */
13663 d32
= getUDisp(sz
,delta
); delta
+= sz
;
13664 putIReg(sz
, eregOfRM(modrm
), mkU(szToITy(sz
), d32
));
13665 DIP("mov%c $0x%x, %s\n", nameISize(sz
), d32
,
13666 nameIReg(sz
,eregOfRM(modrm
)));
13668 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
13670 d32
= getUDisp(sz
,delta
); delta
+= sz
;
13671 storeLE(mkexpr(addr
), mkU(szToITy(sz
), d32
));
13672 DIP("mov%c $0x%x, %s\n", nameISize(sz
), d32
, dis_buf
);
13676 goto decode_failure
;
13678 /* ------------------------ opl imm, A ----------------- */
13680 case 0x04: /* ADD Ib, AL */
13681 delta
= dis_op_imm_A( 1, False
, Iop_Add8
, True
, delta
, "add" );
13683 case 0x05: /* ADD Iv, eAX */
13684 delta
= dis_op_imm_A( sz
, False
, Iop_Add8
, True
, delta
, "add" );
13687 case 0x0C: /* OR Ib, AL */
13688 delta
= dis_op_imm_A( 1, False
, Iop_Or8
, True
, delta
, "or" );
13690 case 0x0D: /* OR Iv, eAX */
13691 delta
= dis_op_imm_A( sz
, False
, Iop_Or8
, True
, delta
, "or" );
13694 case 0x14: /* ADC Ib, AL */
13695 delta
= dis_op_imm_A( 1, True
, Iop_Add8
, True
, delta
, "adc" );
13697 case 0x15: /* ADC Iv, eAX */
13698 delta
= dis_op_imm_A( sz
, True
, Iop_Add8
, True
, delta
, "adc" );
13701 case 0x1C: /* SBB Ib, AL */
13702 delta
= dis_op_imm_A( 1, True
, Iop_Sub8
, True
, delta
, "sbb" );
13704 case 0x1D: /* SBB Iv, eAX */
13705 delta
= dis_op_imm_A( sz
, True
, Iop_Sub8
, True
, delta
, "sbb" );
13708 case 0x24: /* AND Ib, AL */
13709 delta
= dis_op_imm_A( 1, False
, Iop_And8
, True
, delta
, "and" );
13711 case 0x25: /* AND Iv, eAX */
13712 delta
= dis_op_imm_A( sz
, False
, Iop_And8
, True
, delta
, "and" );
13715 case 0x2C: /* SUB Ib, AL */
13716 delta
= dis_op_imm_A( 1, False
, Iop_Sub8
, True
, delta
, "sub" );
13718 case 0x2D: /* SUB Iv, eAX */
13719 delta
= dis_op_imm_A( sz
, False
, Iop_Sub8
, True
, delta
, "sub" );
13722 case 0x34: /* XOR Ib, AL */
13723 delta
= dis_op_imm_A( 1, False
, Iop_Xor8
, True
, delta
, "xor" );
13725 case 0x35: /* XOR Iv, eAX */
13726 delta
= dis_op_imm_A( sz
, False
, Iop_Xor8
, True
, delta
, "xor" );
13729 case 0x3C: /* CMP Ib, AL */
13730 delta
= dis_op_imm_A( 1, False
, Iop_Sub8
, False
, delta
, "cmp" );
13732 case 0x3D: /* CMP Iv, eAX */
13733 delta
= dis_op_imm_A( sz
, False
, Iop_Sub8
, False
, delta
, "cmp" );
13736 case 0xA8: /* TEST Ib, AL */
13737 delta
= dis_op_imm_A( 1, False
, Iop_And8
, False
, delta
, "test" );
13739 case 0xA9: /* TEST Iv, eAX */
13740 delta
= dis_op_imm_A( sz
, False
, Iop_And8
, False
, delta
, "test" );
13743 /* ------------------------ opl Ev, Gv ----------------- */
13745 case 0x02: /* ADD Eb,Gb */
13746 delta
= dis_op2_E_G ( sorb
, False
, Iop_Add8
, True
, 1, delta
, "add" );
13748 case 0x03: /* ADD Ev,Gv */
13749 delta
= dis_op2_E_G ( sorb
, False
, Iop_Add8
, True
, sz
, delta
, "add" );
13752 case 0x0A: /* OR Eb,Gb */
13753 delta
= dis_op2_E_G ( sorb
, False
, Iop_Or8
, True
, 1, delta
, "or" );
13755 case 0x0B: /* OR Ev,Gv */
13756 delta
= dis_op2_E_G ( sorb
, False
, Iop_Or8
, True
, sz
, delta
, "or" );
13759 case 0x12: /* ADC Eb,Gb */
13760 delta
= dis_op2_E_G ( sorb
, True
, Iop_Add8
, True
, 1, delta
, "adc" );
13762 case 0x13: /* ADC Ev,Gv */
13763 delta
= dis_op2_E_G ( sorb
, True
, Iop_Add8
, True
, sz
, delta
, "adc" );
13766 case 0x1A: /* SBB Eb,Gb */
13767 delta
= dis_op2_E_G ( sorb
, True
, Iop_Sub8
, True
, 1, delta
, "sbb" );
13769 case 0x1B: /* SBB Ev,Gv */
13770 delta
= dis_op2_E_G ( sorb
, True
, Iop_Sub8
, True
, sz
, delta
, "sbb" );
13773 case 0x22: /* AND Eb,Gb */
13774 delta
= dis_op2_E_G ( sorb
, False
, Iop_And8
, True
, 1, delta
, "and" );
13776 case 0x23: /* AND Ev,Gv */
13777 delta
= dis_op2_E_G ( sorb
, False
, Iop_And8
, True
, sz
, delta
, "and" );
13780 case 0x2A: /* SUB Eb,Gb */
13781 delta
= dis_op2_E_G ( sorb
, False
, Iop_Sub8
, True
, 1, delta
, "sub" );
13783 case 0x2B: /* SUB Ev,Gv */
13784 delta
= dis_op2_E_G ( sorb
, False
, Iop_Sub8
, True
, sz
, delta
, "sub" );
13787 case 0x32: /* XOR Eb,Gb */
13788 delta
= dis_op2_E_G ( sorb
, False
, Iop_Xor8
, True
, 1, delta
, "xor" );
13790 case 0x33: /* XOR Ev,Gv */
13791 delta
= dis_op2_E_G ( sorb
, False
, Iop_Xor8
, True
, sz
, delta
, "xor" );
13794 case 0x3A: /* CMP Eb,Gb */
13795 delta
= dis_op2_E_G ( sorb
, False
, Iop_Sub8
, False
, 1, delta
, "cmp" );
13797 case 0x3B: /* CMP Ev,Gv */
13798 delta
= dis_op2_E_G ( sorb
, False
, Iop_Sub8
, False
, sz
, delta
, "cmp" );
13801 case 0x84: /* TEST Eb,Gb */
13802 delta
= dis_op2_E_G ( sorb
, False
, Iop_And8
, False
, 1, delta
, "test" );
13804 case 0x85: /* TEST Ev,Gv */
13805 delta
= dis_op2_E_G ( sorb
, False
, Iop_And8
, False
, sz
, delta
, "test" );
13808 /* ------------------------ opl Gv, Ev ----------------- */
13810 case 0x00: /* ADD Gb,Eb */
13811 delta
= dis_op2_G_E ( sorb
, pfx_lock
, False
,
13812 Iop_Add8
, True
, 1, delta
, "add" );
13814 case 0x01: /* ADD Gv,Ev */
13815 delta
= dis_op2_G_E ( sorb
, pfx_lock
, False
,
13816 Iop_Add8
, True
, sz
, delta
, "add" );
13819 case 0x08: /* OR Gb,Eb */
13820 delta
= dis_op2_G_E ( sorb
, pfx_lock
, False
,
13821 Iop_Or8
, True
, 1, delta
, "or" );
13823 case 0x09: /* OR Gv,Ev */
13824 delta
= dis_op2_G_E ( sorb
, pfx_lock
, False
,
13825 Iop_Or8
, True
, sz
, delta
, "or" );
13828 case 0x10: /* ADC Gb,Eb */
13829 delta
= dis_op2_G_E ( sorb
, pfx_lock
, True
,
13830 Iop_Add8
, True
, 1, delta
, "adc" );
13832 case 0x11: /* ADC Gv,Ev */
13833 delta
= dis_op2_G_E ( sorb
, pfx_lock
, True
,
13834 Iop_Add8
, True
, sz
, delta
, "adc" );
13837 case 0x18: /* SBB Gb,Eb */
13838 delta
= dis_op2_G_E ( sorb
, pfx_lock
, True
,
13839 Iop_Sub8
, True
, 1, delta
, "sbb" );
13841 case 0x19: /* SBB Gv,Ev */
13842 delta
= dis_op2_G_E ( sorb
, pfx_lock
, True
,
13843 Iop_Sub8
, True
, sz
, delta
, "sbb" );
13846 case 0x20: /* AND Gb,Eb */
13847 delta
= dis_op2_G_E ( sorb
, pfx_lock
, False
,
13848 Iop_And8
, True
, 1, delta
, "and" );
13850 case 0x21: /* AND Gv,Ev */
13851 delta
= dis_op2_G_E ( sorb
, pfx_lock
, False
,
13852 Iop_And8
, True
, sz
, delta
, "and" );
13855 case 0x28: /* SUB Gb,Eb */
13856 delta
= dis_op2_G_E ( sorb
, pfx_lock
, False
,
13857 Iop_Sub8
, True
, 1, delta
, "sub" );
13859 case 0x29: /* SUB Gv,Ev */
13860 delta
= dis_op2_G_E ( sorb
, pfx_lock
, False
,
13861 Iop_Sub8
, True
, sz
, delta
, "sub" );
13864 case 0x30: /* XOR Gb,Eb */
13865 delta
= dis_op2_G_E ( sorb
, pfx_lock
, False
,
13866 Iop_Xor8
, True
, 1, delta
, "xor" );
13868 case 0x31: /* XOR Gv,Ev */
13869 delta
= dis_op2_G_E ( sorb
, pfx_lock
, False
,
13870 Iop_Xor8
, True
, sz
, delta
, "xor" );
13873 case 0x38: /* CMP Gb,Eb */
13874 delta
= dis_op2_G_E ( sorb
, pfx_lock
, False
,
13875 Iop_Sub8
, False
, 1, delta
, "cmp" );
13877 case 0x39: /* CMP Gv,Ev */
13878 delta
= dis_op2_G_E ( sorb
, pfx_lock
, False
,
13879 Iop_Sub8
, False
, sz
, delta
, "cmp" );
13882 /* ------------------------ POP ------------------------ */
13884 case 0x58: /* POP eAX */
13885 case 0x59: /* POP eCX */
13886 case 0x5A: /* POP eDX */
13887 case 0x5B: /* POP eBX */
13888 case 0x5D: /* POP eBP */
13889 case 0x5E: /* POP eSI */
13890 case 0x5F: /* POP eDI */
13891 case 0x5C: /* POP eSP */
13892 vassert(sz
== 2 || sz
== 4);
13893 t1
= newTemp(szToITy(sz
)); t2
= newTemp(Ity_I32
);
13894 assign(t2
, getIReg(4, R_ESP
));
13895 assign(t1
, loadLE(szToITy(sz
),mkexpr(t2
)));
13896 putIReg(4, R_ESP
, binop(Iop_Add32
, mkexpr(t2
), mkU32(sz
)));
13897 putIReg(sz
, opc
-0x58, mkexpr(t1
));
13898 DIP("pop%c %s\n", nameISize(sz
), nameIReg(sz
,opc
-0x58));
13901 case 0x9D: /* POPF */
13902 vassert(sz
== 2 || sz
== 4);
13903 t1
= newTemp(Ity_I32
); t2
= newTemp(Ity_I32
);
13904 assign(t2
, getIReg(4, R_ESP
));
13905 assign(t1
, widenUto32(loadLE(szToITy(sz
),mkexpr(t2
))));
13906 putIReg(4, R_ESP
, binop(Iop_Add32
, mkexpr(t2
), mkU32(sz
)));
13908 /* Generate IR to set %EFLAGS{O,S,Z,A,C,P,D,ID,AC} from the
13910 set_EFLAGS_from_value( t1
, True
/*emit_AC_emwarn*/,
13911 ((Addr32
)guest_EIP_bbstart
)+delta
);
13913 DIP("popf%c\n", nameISize(sz
));
13916 case 0x61: /* POPA */
13917 /* This is almost certainly wrong for sz==2. So ... */
13918 if (sz
!= 4) goto decode_failure
;
13920 /* t5 is the old %ESP value. */
13921 t5
= newTemp(Ity_I32
);
13922 assign( t5
, getIReg(4, R_ESP
) );
13924 /* Reload all the registers, except %esp. */
13925 putIReg(4,R_EAX
, loadLE(Ity_I32
, binop(Iop_Add32
,mkexpr(t5
),mkU32(28)) ));
13926 putIReg(4,R_ECX
, loadLE(Ity_I32
, binop(Iop_Add32
,mkexpr(t5
),mkU32(24)) ));
13927 putIReg(4,R_EDX
, loadLE(Ity_I32
, binop(Iop_Add32
,mkexpr(t5
),mkU32(20)) ));
13928 putIReg(4,R_EBX
, loadLE(Ity_I32
, binop(Iop_Add32
,mkexpr(t5
),mkU32(16)) ));
13929 /* ignore saved %ESP */
13930 putIReg(4,R_EBP
, loadLE(Ity_I32
, binop(Iop_Add32
,mkexpr(t5
),mkU32( 8)) ));
13931 putIReg(4,R_ESI
, loadLE(Ity_I32
, binop(Iop_Add32
,mkexpr(t5
),mkU32( 4)) ));
13932 putIReg(4,R_EDI
, loadLE(Ity_I32
, binop(Iop_Add32
,mkexpr(t5
),mkU32( 0)) ));
13934 /* and move %ESP back up */
13935 putIReg( 4, R_ESP
, binop(Iop_Add32
, mkexpr(t5
), mkU32(8*4)) );
13937 DIP("popa%c\n", nameISize(sz
));
13940 case 0x8F: /* POPL/POPW m32 */
13942 UChar rm
= getIByte(delta
);
13944 /* make sure this instruction is correct POP */
13945 if (epartIsReg(rm
) || gregOfRM(rm
) != 0)
13946 goto decode_failure
;
13947 /* and has correct size */
13948 if (sz
!= 4 && sz
!= 2)
13949 goto decode_failure
;
13952 t1
= newTemp(Ity_I32
); /* stack address */
13953 t3
= newTemp(ty
); /* data */
13954 /* set t1 to ESP: t1 = ESP */
13955 assign( t1
, getIReg(4, R_ESP
) );
13956 /* load M[ESP] to virtual register t3: t3 = M[t1] */
13957 assign( t3
, loadLE(ty
, mkexpr(t1
)) );
13959 /* increase ESP; must be done before the STORE. Intel manual says:
13960 If the ESP register is used as a base register for addressing
13961 a destination operand in memory, the POP instruction computes
13962 the effective address of the operand after it increments the
13965 putIReg(4, R_ESP
, binop(Iop_Add32
, mkexpr(t1
), mkU32(sz
)) );
13967 /* resolve MODR/M */
13968 addr
= disAMode ( &len
, sorb
, delta
, dis_buf
);
13969 storeLE( mkexpr(addr
), mkexpr(t3
) );
13971 DIP("pop%c %s\n", sz
==2 ? 'w' : 'l', dis_buf
);
13977 case 0x1F: /* POP %DS */
13978 dis_pop_segreg( R_DS
, sz
); break;
13979 case 0x07: /* POP %ES */
13980 dis_pop_segreg( R_ES
, sz
); break;
13981 case 0x17: /* POP %SS */
13982 dis_pop_segreg( R_SS
, sz
); break;
13984 /* ------------------------ PUSH ----------------------- */
13986 case 0x50: /* PUSH eAX */
13987 case 0x51: /* PUSH eCX */
13988 case 0x52: /* PUSH eDX */
13989 case 0x53: /* PUSH eBX */
13990 case 0x55: /* PUSH eBP */
13991 case 0x56: /* PUSH eSI */
13992 case 0x57: /* PUSH eDI */
13993 case 0x54: /* PUSH eSP */
13994 /* This is the Right Way, in that the value to be pushed is
13995 established before %esp is changed, so that pushl %esp
13996 correctly pushes the old value. */
13997 vassert(sz
== 2 || sz
== 4);
13998 ty
= sz
==2 ? Ity_I16
: Ity_I32
;
13999 t1
= newTemp(ty
); t2
= newTemp(Ity_I32
);
14000 assign(t1
, getIReg(sz
, opc
-0x50));
14001 assign(t2
, binop(Iop_Sub32
, getIReg(4, R_ESP
), mkU32(sz
)));
14002 putIReg(4, R_ESP
, mkexpr(t2
) );
14003 storeLE(mkexpr(t2
),mkexpr(t1
));
14004 DIP("push%c %s\n", nameISize(sz
), nameIReg(sz
,opc
-0x50));
14008 case 0x68: /* PUSH Iv */
14009 d32
= getUDisp(sz
,delta
); delta
+= sz
;
14011 case 0x6A: /* PUSH Ib, sign-extended to sz */
14012 d32
= getSDisp8(delta
); delta
+= 1;
14016 t1
= newTemp(Ity_I32
); t2
= newTemp(ty
);
14017 assign( t1
, binop(Iop_Sub32
,getIReg(4,R_ESP
),mkU32(sz
)) );
14018 putIReg(4, R_ESP
, mkexpr(t1
) );
14019 /* stop mkU16 asserting if d32 is a negative 16-bit number
14023 storeLE( mkexpr(t1
), mkU(ty
,d32
) );
14024 DIP("push%c $0x%x\n", nameISize(sz
), d32
);
14027 case 0x9C: /* PUSHF */ {
14028 vassert(sz
== 2 || sz
== 4);
14030 t1
= newTemp(Ity_I32
);
14031 assign( t1
, binop(Iop_Sub32
,getIReg(4,R_ESP
),mkU32(sz
)) );
14032 putIReg(4, R_ESP
, mkexpr(t1
) );
14034 /* Calculate OSZACP, and patch in fixed fields as per
14036 - bit 1 is always 1
14037 - bit 9 is Interrupt Enable (should always be 1 in user mode?)
14039 t2
= newTemp(Ity_I32
);
14040 assign( t2
, binop(Iop_Or32
,
14041 mk_x86g_calculate_eflags_all(),
14042 mkU32( (1<<1)|(1<<9) ) ));
14044 /* Patch in the D flag. This can simply be a copy of bit 10 of
14045 baseBlock[OFFB_DFLAG]. */
14046 t3
= newTemp(Ity_I32
);
14047 assign( t3
, binop(Iop_Or32
,
14050 IRExpr_Get(OFFB_DFLAG
,Ity_I32
),
14054 /* And patch in the ID flag. */
14055 t4
= newTemp(Ity_I32
);
14056 assign( t4
, binop(Iop_Or32
,
14059 binop(Iop_Shl32
, IRExpr_Get(OFFB_IDFLAG
,Ity_I32
),
14064 /* And patch in the AC flag. */
14065 t5
= newTemp(Ity_I32
);
14066 assign( t5
, binop(Iop_Or32
,
14069 binop(Iop_Shl32
, IRExpr_Get(OFFB_ACFLAG
,Ity_I32
),
14074 /* if sz==2, the stored value needs to be narrowed. */
14076 storeLE( mkexpr(t1
), unop(Iop_32to16
,mkexpr(t5
)) );
14078 storeLE( mkexpr(t1
), mkexpr(t5
) );
14080 DIP("pushf%c\n", nameISize(sz
));
14084 case 0x60: /* PUSHA */
14085 /* This is almost certainly wrong for sz==2. So ... */
14086 if (sz
!= 4) goto decode_failure
;
14088 /* This is the Right Way, in that the value to be pushed is
14089 established before %esp is changed, so that pusha
14090 correctly pushes the old %esp value. New value of %esp is
14091 pushed at start. */
14092 /* t0 is the %ESP value we're going to push. */
14093 t0
= newTemp(Ity_I32
);
14094 assign( t0
, getIReg(4, R_ESP
) );
14096 /* t5 will be the new %ESP value. */
14097 t5
= newTemp(Ity_I32
);
14098 assign( t5
, binop(Iop_Sub32
, mkexpr(t0
), mkU32(8*4)) );
14100 /* Update guest state before prodding memory. */
14101 putIReg(4, R_ESP
, mkexpr(t5
));
14103 /* Dump all the registers. */
14104 storeLE( binop(Iop_Add32
,mkexpr(t5
),mkU32(28)), getIReg(4,R_EAX
) );
14105 storeLE( binop(Iop_Add32
,mkexpr(t5
),mkU32(24)), getIReg(4,R_ECX
) );
14106 storeLE( binop(Iop_Add32
,mkexpr(t5
),mkU32(20)), getIReg(4,R_EDX
) );
14107 storeLE( binop(Iop_Add32
,mkexpr(t5
),mkU32(16)), getIReg(4,R_EBX
) );
14108 storeLE( binop(Iop_Add32
,mkexpr(t5
),mkU32(12)), mkexpr(t0
) /*esp*/);
14109 storeLE( binop(Iop_Add32
,mkexpr(t5
),mkU32( 8)), getIReg(4,R_EBP
) );
14110 storeLE( binop(Iop_Add32
,mkexpr(t5
),mkU32( 4)), getIReg(4,R_ESI
) );
14111 storeLE( binop(Iop_Add32
,mkexpr(t5
),mkU32( 0)), getIReg(4,R_EDI
) );
14113 DIP("pusha%c\n", nameISize(sz
));
14116 case 0x0E: /* PUSH %CS */
14117 dis_push_segreg( R_CS
, sz
); break;
14118 case 0x1E: /* PUSH %DS */
14119 dis_push_segreg( R_DS
, sz
); break;
14120 case 0x06: /* PUSH %ES */
14121 dis_push_segreg( R_ES
, sz
); break;
14122 case 0x16: /* PUSH %SS */
14123 dis_push_segreg( R_SS
, sz
); break;
14125 /* ------------------------ SCAS et al ----------------- */
14127 case 0xA4: /* MOVS, no REP prefix */
14130 goto decode_failure
; /* else dis_string_op asserts */
14131 dis_string_op( dis_MOVS
, ( opc
== 0xA4 ? 1 : sz
), "movs", sorb
);
14134 case 0xA6: /* CMPSb, no REP prefix */
14137 goto decode_failure
; /* else dis_string_op asserts */
14138 dis_string_op( dis_CMPS
, ( opc
== 0xA6 ? 1 : sz
), "cmps", sorb
);
14141 case 0xAA: /* STOS, no REP prefix */
14144 goto decode_failure
; /* else dis_string_op asserts */
14145 dis_string_op( dis_STOS
, ( opc
== 0xAA ? 1 : sz
), "stos", sorb
);
14148 case 0xAC: /* LODS, no REP prefix */
14151 goto decode_failure
; /* else dis_string_op asserts */
14152 dis_string_op( dis_LODS
, ( opc
== 0xAC ? 1 : sz
), "lods", sorb
);
14155 case 0xAE: /* SCAS, no REP prefix */
14158 goto decode_failure
; /* else dis_string_op asserts */
14159 dis_string_op( dis_SCAS
, ( opc
== 0xAE ? 1 : sz
), "scas", sorb
);
14163 case 0xFC: /* CLD */
14164 stmt( IRStmt_Put( OFFB_DFLAG
, mkU32(1)) );
14168 case 0xFD: /* STD */
14169 stmt( IRStmt_Put( OFFB_DFLAG
, mkU32(0xFFFFFFFF)) );
14173 case 0xF8: /* CLC */
14174 case 0xF9: /* STC */
14175 case 0xF5: /* CMC */
14176 t0
= newTemp(Ity_I32
);
14177 t1
= newTemp(Ity_I32
);
14178 assign( t0
, mk_x86g_calculate_eflags_all() );
14181 assign( t1
, binop(Iop_And32
, mkexpr(t0
),
14182 mkU32(~X86G_CC_MASK_C
)));
14186 assign( t1
, binop(Iop_Or32
, mkexpr(t0
),
14187 mkU32(X86G_CC_MASK_C
)));
14191 assign( t1
, binop(Iop_Xor32
, mkexpr(t0
),
14192 mkU32(X86G_CC_MASK_C
)));
14196 vpanic("disInstr(x86)(clc/stc/cmc)");
14198 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
14199 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
14200 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(t1
) ));
14201 /* Set NDEP even though it isn't used. This makes redundant-PUT
14202 elimination of previous stores to this field work better. */
14203 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
14206 case 0xD6: /* SALC */
14207 t0
= newTemp(Ity_I32
);
14208 t1
= newTemp(Ity_I32
);
14209 assign( t0
, binop(Iop_And32
,
14210 mk_x86g_calculate_eflags_c(),
14212 assign( t1
, binop(Iop_Sar32
,
14213 binop(Iop_Shl32
, mkexpr(t0
), mkU8(31)),
14215 putIReg(1, R_EAX
, unop(Iop_32to8
, mkexpr(t1
)) );
14219 /* REPNE prefix insn */
14221 Addr32 eip_orig
= guest_EIP_bbstart
+ delta_start
;
14222 if (sorb
!= 0) goto decode_failure
;
14223 abyte
= getIByte(delta
); delta
++;
14225 if (abyte
== 0x66) { sz
= 2; abyte
= getIByte(delta
); delta
++; }
14228 /* According to the Intel manual, "repne movs" should never occur, but
14229 * in practice it has happened, so allow for it here... */
14230 case 0xA4: sz
= 1; /* REPNE MOVS<sz> fallthrough */
14232 dis_REP_op ( &dres
, X86CondNZ
, dis_MOVS
, sz
, eip_orig
,
14233 guest_EIP_bbstart
+delta
, "repne movs" );
14236 case 0xA6: sz
= 1; /* REPNE CMP<sz> fallthrough */
14238 dis_REP_op ( &dres
, X86CondNZ
, dis_CMPS
, sz
, eip_orig
,
14239 guest_EIP_bbstart
+delta
, "repne cmps" );
14242 case 0xAA: sz
= 1; /* REPNE STOS<sz> fallthrough */
14244 dis_REP_op ( &dres
, X86CondNZ
, dis_STOS
, sz
, eip_orig
,
14245 guest_EIP_bbstart
+delta
, "repne stos" );
14248 case 0xAE: sz
= 1; /* REPNE SCAS<sz> fallthrough */
14250 dis_REP_op ( &dres
, X86CondNZ
, dis_SCAS
, sz
, eip_orig
,
14251 guest_EIP_bbstart
+delta
, "repne scas" );
14255 goto decode_failure
;
14260 /* REP/REPE prefix insn (for SCAS and CMPS, 0xF3 means REPE,
14261 for the rest, it means REP) */
14263 Addr32 eip_orig
= guest_EIP_bbstart
+ delta_start
;
14264 abyte
= getIByte(delta
); delta
++;
14266 if (abyte
== 0x66) { sz
= 2; abyte
= getIByte(delta
); delta
++; }
14268 if (sorb
!= 0 && abyte
!= 0x0F) goto decode_failure
;
14272 switch (getIByte(delta
)) {
14273 /* On older CPUs, TZCNT behaves the same as BSF. */
14274 case 0xBC: /* REP BSF Gv,Ev */
14275 delta
= dis_bs_E_G ( sorb
, sz
, delta
+ 1, True
);
14277 /* On older CPUs, LZCNT behaves the same as BSR. */
14278 case 0xBD: /* REP BSR Gv,Ev */
14279 delta
= dis_bs_E_G ( sorb
, sz
, delta
+ 1, False
);
14282 goto decode_failure
;
14286 case 0xA4: sz
= 1; /* REP MOVS<sz> fallthrough */
14288 dis_REP_op ( &dres
, X86CondAlways
, dis_MOVS
, sz
, eip_orig
,
14289 guest_EIP_bbstart
+delta
, "rep movs" );
14292 case 0xA6: sz
= 1; /* REPE CMP<sz> fallthrough */
14294 dis_REP_op ( &dres
, X86CondZ
, dis_CMPS
, sz
, eip_orig
,
14295 guest_EIP_bbstart
+delta
, "repe cmps" );
14298 case 0xAA: sz
= 1; /* REP STOS<sz> fallthrough */
14300 dis_REP_op ( &dres
, X86CondAlways
, dis_STOS
, sz
, eip_orig
,
14301 guest_EIP_bbstart
+delta
, "rep stos" );
14304 case 0xAC: sz
= 1; /* REP LODS<sz> fallthrough */
14306 dis_REP_op ( &dres
, X86CondAlways
, dis_LODS
, sz
, eip_orig
,
14307 guest_EIP_bbstart
+delta
, "rep lods" );
14310 case 0xAE: sz
= 1; /* REPE SCAS<sz> fallthrough */
14312 dis_REP_op ( &dres
, X86CondZ
, dis_SCAS
, sz
, eip_orig
,
14313 guest_EIP_bbstart
+delta
, "repe scas" );
14316 case 0x90: /* REP NOP (PAUSE) */
14317 /* a hint to the P4 re spin-wait loop */
14318 DIP("rep nop (P4 pause)\n");
14319 /* "observe" the hint. The Vex client needs to be careful not
14320 to cause very long delays as a result, though. */
14321 jmp_lit(&dres
, Ijk_Yield
, ((Addr32
)guest_EIP_bbstart
)+delta
);
14322 vassert(dres
.whatNext
== Dis_StopHere
);
14325 case 0xC3: /* REP RET -- same as normal ret? */
14331 goto decode_failure
;
14336 /* ------------------------ XCHG ----------------------- */
14338 /* XCHG reg,mem automatically asserts LOCK# even without a LOCK
14339 prefix; hence it must be translated with an IRCAS (at least, the
14340 memory variant). */
14341 case 0x86: /* XCHG Gb,Eb */
14343 /* Fall through ... */
14344 case 0x87: /* XCHG Gv,Ev */
14345 modrm
= getIByte(delta
);
14347 t1
= newTemp(ty
); t2
= newTemp(ty
);
14348 if (epartIsReg(modrm
)) {
14349 assign(t1
, getIReg(sz
, eregOfRM(modrm
)));
14350 assign(t2
, getIReg(sz
, gregOfRM(modrm
)));
14351 putIReg(sz
, gregOfRM(modrm
), mkexpr(t1
));
14352 putIReg(sz
, eregOfRM(modrm
), mkexpr(t2
));
14354 DIP("xchg%c %s, %s\n",
14355 nameISize(sz
), nameIReg(sz
,gregOfRM(modrm
)),
14356 nameIReg(sz
,eregOfRM(modrm
)));
14358 *expect_CAS
= True
;
14359 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
14360 assign( t1
, loadLE(ty
,mkexpr(addr
)) );
14361 assign( t2
, getIReg(sz
,gregOfRM(modrm
)) );
14362 casLE( mkexpr(addr
),
14363 mkexpr(t1
), mkexpr(t2
), guest_EIP_curr_instr
);
14364 putIReg( sz
, gregOfRM(modrm
), mkexpr(t1
) );
14366 DIP("xchg%c %s, %s\n", nameISize(sz
),
14367 nameIReg(sz
,gregOfRM(modrm
)), dis_buf
);
14371 case 0x90: /* XCHG eAX,eAX */
14374 case 0x91: /* XCHG eAX,eCX */
14375 case 0x92: /* XCHG eAX,eDX */
14376 case 0x93: /* XCHG eAX,eBX */
14377 case 0x94: /* XCHG eAX,eSP */
14378 case 0x95: /* XCHG eAX,eBP */
14379 case 0x96: /* XCHG eAX,eSI */
14380 case 0x97: /* XCHG eAX,eDI */
14381 codegen_xchg_eAX_Reg ( sz
, opc
- 0x90 );
14384 /* ------------------------ XLAT ----------------------- */
14386 case 0xD7: /* XLAT */
14387 if (sz
!= 4) goto decode_failure
; /* sz == 2 is also allowed (0x66) */
14396 unop(Iop_8Uto32
, getIReg(1, R_EAX
/*AL*/))))));
14398 DIP("xlat%c [ebx]\n", nameISize(sz
));
14401 /* ------------------------ IN / OUT ----------------------- */
14403 case 0xE4: /* IN imm8, AL */
14405 t1
= newTemp(Ity_I32
);
14406 abyte
= getIByte(delta
); delta
++;
14407 assign(t1
, mkU32( abyte
& 0xFF ));
14408 DIP("in%c $%d,%s\n", nameISize(sz
), abyte
, nameIReg(sz
,R_EAX
));
14410 case 0xE5: /* IN imm8, eAX */
14411 vassert(sz
== 2 || sz
== 4);
14412 t1
= newTemp(Ity_I32
);
14413 abyte
= getIByte(delta
); delta
++;
14414 assign(t1
, mkU32( abyte
& 0xFF ));
14415 DIP("in%c $%d,%s\n", nameISize(sz
), abyte
, nameIReg(sz
,R_EAX
));
14417 case 0xEC: /* IN %DX, AL */
14419 t1
= newTemp(Ity_I32
);
14420 assign(t1
, unop(Iop_16Uto32
, getIReg(2, R_EDX
)));
14421 DIP("in%c %s,%s\n", nameISize(sz
), nameIReg(2,R_EDX
),
14422 nameIReg(sz
,R_EAX
));
14424 case 0xED: /* IN %DX, eAX */
14425 vassert(sz
== 2 || sz
== 4);
14426 t1
= newTemp(Ity_I32
);
14427 assign(t1
, unop(Iop_16Uto32
, getIReg(2, R_EDX
)));
14428 DIP("in%c %s,%s\n", nameISize(sz
), nameIReg(2,R_EDX
),
14429 nameIReg(sz
,R_EAX
));
14432 /* At this point, sz indicates the width, and t1 is a 32-bit
14433 value giving port number. */
14435 vassert(sz
== 1 || sz
== 2 || sz
== 4);
14437 t2
= newTemp(Ity_I32
);
14438 d
= unsafeIRDirty_1_N(
14441 "x86g_dirtyhelper_IN",
14442 &x86g_dirtyhelper_IN
,
14443 mkIRExprVec_2( mkexpr(t1
), mkU32(sz
) )
14445 /* do the call, dumping the result in t2. */
14446 stmt( IRStmt_Dirty(d
) );
14447 putIReg(sz
, R_EAX
, narrowTo( ty
, mkexpr(t2
) ) );
14451 case 0xE6: /* OUT AL, imm8 */
14453 t1
= newTemp(Ity_I32
);
14454 abyte
= getIByte(delta
); delta
++;
14455 assign( t1
, mkU32( abyte
& 0xFF ) );
14456 DIP("out%c %s,$%d\n", nameISize(sz
), nameIReg(sz
,R_EAX
), abyte
);
14458 case 0xE7: /* OUT eAX, imm8 */
14459 vassert(sz
== 2 || sz
== 4);
14460 t1
= newTemp(Ity_I32
);
14461 abyte
= getIByte(delta
); delta
++;
14462 assign( t1
, mkU32( abyte
& 0xFF ) );
14463 DIP("out%c %s,$%d\n", nameISize(sz
), nameIReg(sz
,R_EAX
), abyte
);
14465 case 0xEE: /* OUT AL, %DX */
14467 t1
= newTemp(Ity_I32
);
14468 assign( t1
, unop(Iop_16Uto32
, getIReg(2, R_EDX
)) );
14469 DIP("out%c %s,%s\n", nameISize(sz
), nameIReg(sz
,R_EAX
),
14470 nameIReg(2,R_EDX
));
14472 case 0xEF: /* OUT eAX, %DX */
14473 vassert(sz
== 2 || sz
== 4);
14474 t1
= newTemp(Ity_I32
);
14475 assign( t1
, unop(Iop_16Uto32
, getIReg(2, R_EDX
)) );
14476 DIP("out%c %s,%s\n", nameISize(sz
), nameIReg(sz
,R_EAX
),
14477 nameIReg(2,R_EDX
));
14480 /* At this point, sz indicates the width, and t1 is a 32-bit
14481 value giving port number. */
14483 vassert(sz
== 1 || sz
== 2 || sz
== 4);
14485 d
= unsafeIRDirty_0_N(
14487 "x86g_dirtyhelper_OUT",
14488 &x86g_dirtyhelper_OUT
,
14489 mkIRExprVec_3( mkexpr(t1
),
14490 widenUto32( getIReg(sz
, R_EAX
) ),
14493 stmt( IRStmt_Dirty(d
) );
14497 /* ------------------------ (Grp1 extensions) ---------- */
14499 case 0x82: /* Grp1 Ib,Eb too. Apparently this is the same as
14500 case 0x80, but only in 32-bit mode. */
14502 case 0x80: /* Grp1 Ib,Eb */
14503 modrm
= getIByte(delta
);
14504 am_sz
= lengthAMode(delta
);
14507 d32
= getUChar(delta
+ am_sz
);
14508 delta
= dis_Grp1 ( sorb
, pfx_lock
, delta
, modrm
, am_sz
, d_sz
, sz
, d32
);
14511 case 0x81: /* Grp1 Iv,Ev */
14512 modrm
= getIByte(delta
);
14513 am_sz
= lengthAMode(delta
);
14515 d32
= getUDisp(d_sz
, delta
+ am_sz
);
14516 delta
= dis_Grp1 ( sorb
, pfx_lock
, delta
, modrm
, am_sz
, d_sz
, sz
, d32
);
14519 case 0x83: /* Grp1 Ib,Ev */
14520 modrm
= getIByte(delta
);
14521 am_sz
= lengthAMode(delta
);
14523 d32
= getSDisp8(delta
+ am_sz
);
14524 delta
= dis_Grp1 ( sorb
, pfx_lock
, delta
, modrm
, am_sz
, d_sz
, sz
, d32
);
14527 /* ------------------------ (Grp2 extensions) ---------- */
14529 case 0xC0: { /* Grp2 Ib,Eb */
14530 Bool decode_OK
= True
;
14531 modrm
= getIByte(delta
);
14532 am_sz
= lengthAMode(delta
);
14534 d32
= getUChar(delta
+ am_sz
);
14536 delta
= dis_Grp2 ( sorb
, delta
, modrm
, am_sz
, d_sz
, sz
,
14537 mkU8(d32
& 0xFF), NULL
, &decode_OK
);
14539 goto decode_failure
;
14542 case 0xC1: { /* Grp2 Ib,Ev */
14543 Bool decode_OK
= True
;
14544 modrm
= getIByte(delta
);
14545 am_sz
= lengthAMode(delta
);
14547 d32
= getUChar(delta
+ am_sz
);
14548 delta
= dis_Grp2 ( sorb
, delta
, modrm
, am_sz
, d_sz
, sz
,
14549 mkU8(d32
& 0xFF), NULL
, &decode_OK
);
14551 goto decode_failure
;
14554 case 0xD0: { /* Grp2 1,Eb */
14555 Bool decode_OK
= True
;
14556 modrm
= getIByte(delta
);
14557 am_sz
= lengthAMode(delta
);
14561 delta
= dis_Grp2 ( sorb
, delta
, modrm
, am_sz
, d_sz
, sz
,
14562 mkU8(d32
), NULL
, &decode_OK
);
14564 goto decode_failure
;
14567 case 0xD1: { /* Grp2 1,Ev */
14568 Bool decode_OK
= True
;
14569 modrm
= getUChar(delta
);
14570 am_sz
= lengthAMode(delta
);
14573 delta
= dis_Grp2 ( sorb
, delta
, modrm
, am_sz
, d_sz
, sz
,
14574 mkU8(d32
), NULL
, &decode_OK
);
14576 goto decode_failure
;
14579 case 0xD2: { /* Grp2 CL,Eb */
14580 Bool decode_OK
= True
;
14581 modrm
= getUChar(delta
);
14582 am_sz
= lengthAMode(delta
);
14585 delta
= dis_Grp2 ( sorb
, delta
, modrm
, am_sz
, d_sz
, sz
,
14586 getIReg(1,R_ECX
), "%cl", &decode_OK
);
14588 goto decode_failure
;
14591 case 0xD3: { /* Grp2 CL,Ev */
14592 Bool decode_OK
= True
;
14593 modrm
= getIByte(delta
);
14594 am_sz
= lengthAMode(delta
);
14596 delta
= dis_Grp2 ( sorb
, delta
, modrm
, am_sz
, d_sz
, sz
,
14597 getIReg(1,R_ECX
), "%cl", &decode_OK
);
14599 goto decode_failure
;
14603 /* ------------------------ (Grp3 extensions) ---------- */
14605 case 0xF6: { /* Grp3 Eb */
14606 Bool decode_OK
= True
;
14607 delta
= dis_Grp3 ( sorb
, pfx_lock
, 1, delta
, &decode_OK
);
14609 goto decode_failure
;
14612 case 0xF7: { /* Grp3 Ev */
14613 Bool decode_OK
= True
;
14614 delta
= dis_Grp3 ( sorb
, pfx_lock
, sz
, delta
, &decode_OK
);
14616 goto decode_failure
;
14620 /* ------------------------ (Grp4 extensions) ---------- */
14622 case 0xFE: { /* Grp4 Eb */
14623 Bool decode_OK
= True
;
14624 delta
= dis_Grp4 ( sorb
, pfx_lock
, delta
, &decode_OK
);
14626 goto decode_failure
;
14630 /* ------------------------ (Grp5 extensions) ---------- */
14632 case 0xFF: { /* Grp5 Ev */
14633 Bool decode_OK
= True
;
14634 delta
= dis_Grp5 ( sorb
, pfx_lock
, sz
, delta
, &dres
, &decode_OK
);
14636 goto decode_failure
;
14640 /* ------------------------ Escapes to 2-byte opcodes -- */
14643 opc
= getIByte(delta
); delta
++;
14646 /* =-=-=-=-=-=-=-=-=- Grp8 =-=-=-=-=-=-=-=-=-=-=-= */
14648 case 0xBA: { /* Grp8 Ib,Ev */
14649 Bool decode_OK
= False
;
14650 modrm
= getUChar(delta
);
14651 am_sz
= lengthAMode(delta
);
14652 d32
= getSDisp8(delta
+ am_sz
);
14653 delta
= dis_Grp8_Imm ( sorb
, pfx_lock
, delta
, modrm
,
14654 am_sz
, sz
, d32
, &decode_OK
);
14656 goto decode_failure
;
14660 /* =-=-=-=-=-=-=-=-=- BSF/BSR -=-=-=-=-=-=-=-=-=-= */
14662 case 0xBC: /* BSF Gv,Ev */
14663 delta
= dis_bs_E_G ( sorb
, sz
, delta
, True
);
14665 case 0xBD: /* BSR Gv,Ev */
14666 delta
= dis_bs_E_G ( sorb
, sz
, delta
, False
);
14669 /* =-=-=-=-=-=-=-=-=- BSWAP -=-=-=-=-=-=-=-=-=-=-= */
14671 case 0xC8: /* BSWAP %eax */
14678 case 0xCF: /* BSWAP %edi */
14679 /* AFAICS from the Intel docs, this only exists at size 4. */
14680 if (sz
!= 4) goto decode_failure
;
14682 t1
= newTemp(Ity_I32
);
14683 assign( t1
, getIReg(4, opc
-0xC8) );
14684 t2
= math_BSWAP(t1
, Ity_I32
);
14686 putIReg(4, opc
-0xC8, mkexpr(t2
));
14687 DIP("bswapl %s\n", nameIReg(4, opc
-0xC8));
14690 /* =-=-=-=-=-=-=-=-=- BT/BTS/BTR/BTC =-=-=-=-=-=-= */
14692 case 0xA3: /* BT Gv,Ev */
14693 delta
= dis_bt_G_E ( vbi
, sorb
, pfx_lock
, sz
, delta
, BtOpNone
);
14695 case 0xB3: /* BTR Gv,Ev */
14696 delta
= dis_bt_G_E ( vbi
, sorb
, pfx_lock
, sz
, delta
, BtOpReset
);
14698 case 0xAB: /* BTS Gv,Ev */
14699 delta
= dis_bt_G_E ( vbi
, sorb
, pfx_lock
, sz
, delta
, BtOpSet
);
14701 case 0xBB: /* BTC Gv,Ev */
14702 delta
= dis_bt_G_E ( vbi
, sorb
, pfx_lock
, sz
, delta
, BtOpComp
);
14705 /* =-=-=-=-=-=-=-=-=- CMOV =-=-=-=-=-=-=-=-=-=-=-= */
14709 case 0x42: /* CMOVBb/CMOVNAEb (cmov below) */
14710 case 0x43: /* CMOVNBb/CMOVAEb (cmov not below) */
14711 case 0x44: /* CMOVZb/CMOVEb (cmov zero) */
14712 case 0x45: /* CMOVNZb/CMOVNEb (cmov not zero) */
14713 case 0x46: /* CMOVBEb/CMOVNAb (cmov below or equal) */
14714 case 0x47: /* CMOVNBEb/CMOVAb (cmov not below or equal) */
14715 case 0x48: /* CMOVSb (cmov negative) */
14716 case 0x49: /* CMOVSb (cmov not negative) */
14717 case 0x4A: /* CMOVP (cmov parity even) */
14718 case 0x4B: /* CMOVNP (cmov parity odd) */
14719 case 0x4C: /* CMOVLb/CMOVNGEb (cmov less) */
14720 case 0x4D: /* CMOVGEb/CMOVNLb (cmov greater or equal) */
14721 case 0x4E: /* CMOVLEb/CMOVNGb (cmov less or equal) */
14722 case 0x4F: /* CMOVGb/CMOVNLEb (cmov greater) */
14723 delta
= dis_cmov_E_G(sorb
, sz
, (X86Condcode
)(opc
- 0x40), delta
);
14726 /* =-=-=-=-=-=-=-=-=- CMPXCHG -=-=-=-=-=-=-=-=-=-= */
14728 case 0xB0: /* CMPXCHG Gb,Eb */
14729 delta
= dis_cmpxchg_G_E ( sorb
, pfx_lock
, 1, delta
);
14731 case 0xB1: /* CMPXCHG Gv,Ev */
14732 delta
= dis_cmpxchg_G_E ( sorb
, pfx_lock
, sz
, delta
);
14735 case 0xC7: { /* CMPXCHG8B Gv (0F C7 /1) */
14736 IRTemp expdHi
= newTemp(Ity_I32
);
14737 IRTemp expdLo
= newTemp(Ity_I32
);
14738 IRTemp dataHi
= newTemp(Ity_I32
);
14739 IRTemp dataLo
= newTemp(Ity_I32
);
14740 IRTemp oldHi
= newTemp(Ity_I32
);
14741 IRTemp oldLo
= newTemp(Ity_I32
);
14742 IRTemp flags_old
= newTemp(Ity_I32
);
14743 IRTemp flags_new
= newTemp(Ity_I32
);
14744 IRTemp success
= newTemp(Ity_I1
);
14746 /* Translate this using a DCAS, even if there is no LOCK
14747 prefix. Life is too short to bother with generating two
14748 different translations for the with/without-LOCK-prefix
14750 *expect_CAS
= True
;
14752 /* Decode, and generate address. */
14753 if (sz
!= 4) goto decode_failure
;
14754 modrm
= getIByte(delta
);
14755 if (epartIsReg(modrm
)) goto decode_failure
;
14756 if (gregOfRM(modrm
) != 1) goto decode_failure
;
14757 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
14760 /* Get the expected and new values. */
14761 assign( expdHi
, getIReg(4,R_EDX
) );
14762 assign( expdLo
, getIReg(4,R_EAX
) );
14763 assign( dataHi
, getIReg(4,R_ECX
) );
14764 assign( dataLo
, getIReg(4,R_EBX
) );
14768 mkIRCAS( oldHi
, oldLo
,
14769 Iend_LE
, mkexpr(addr
),
14770 mkexpr(expdHi
), mkexpr(expdLo
),
14771 mkexpr(dataHi
), mkexpr(dataLo
)
14774 /* success when oldHi:oldLo == expdHi:expdLo */
14776 binop(Iop_CasCmpEQ32
,
14778 binop(Iop_Xor32
, mkexpr(oldHi
), mkexpr(expdHi
)),
14779 binop(Iop_Xor32
, mkexpr(oldLo
), mkexpr(expdLo
))
14784 /* If the DCAS is successful, that is to say oldHi:oldLo ==
14785 expdHi:expdLo, then put expdHi:expdLo back in EDX:EAX,
14786 which is where they came from originally. Both the actual
14787 contents of these two regs, and any shadow values, are
14788 unchanged. If the DCAS fails then we're putting into
14789 EDX:EAX the value seen in memory. */
14791 IRExpr_ITE( mkexpr(success
),
14792 mkexpr(expdHi
), mkexpr(oldHi
)
14795 IRExpr_ITE( mkexpr(success
),
14796 mkexpr(expdLo
), mkexpr(oldLo
)
14799 /* Copy the success bit into the Z flag and leave the others
14801 assign( flags_old
, widenUto32(mk_x86g_calculate_eflags_all()));
14805 binop(Iop_And32
, mkexpr(flags_old
),
14806 mkU32(~X86G_CC_MASK_Z
)),
14809 unop(Iop_1Uto32
, mkexpr(success
)), mkU32(1)),
14810 mkU8(X86G_CC_SHIFT_Z
)) ));
14812 stmt( IRStmt_Put( OFFB_CC_OP
, mkU32(X86G_CC_OP_COPY
) ));
14813 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(flags_new
) ));
14814 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU32(0) ));
14815 /* Set NDEP even though it isn't used. This makes
14816 redundant-PUT elimination of previous stores to this field
14818 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU32(0) ));
14820 /* Sheesh. Aren't you glad it was me and not you that had to
14821 write and validate all this grunge? */
14823 DIP("cmpxchg8b %s\n", dis_buf
);
14827 /* =-=-=-=-=-=-=-=-=- CPUID -=-=-=-=-=-=-=-=-=-=-= */
14829 case 0xA2: { /* CPUID */
14830 /* Uses dirty helper:
14831 void dirtyhelper_CPUID_sse[012] ( VexGuestX86State* )
14832 declared to mod eax, wr ebx, ecx, edx
14835 void* fAddr
= NULL
;
14836 const HChar
* fName
= NULL
;
14837 if (archinfo
->hwcaps
& VEX_HWCAPS_X86_SSE3
) {
14838 fName
= "x86g_dirtyhelper_CPUID_sse3";
14839 fAddr
= &x86g_dirtyhelper_CPUID_sse3
;
14842 if (archinfo
->hwcaps
& VEX_HWCAPS_X86_SSE2
) {
14843 fName
= "x86g_dirtyhelper_CPUID_sse2";
14844 fAddr
= &x86g_dirtyhelper_CPUID_sse2
;
14847 if (archinfo
->hwcaps
& VEX_HWCAPS_X86_SSE1
) {
14848 fName
= "x86g_dirtyhelper_CPUID_sse1";
14849 fAddr
= &x86g_dirtyhelper_CPUID_sse1
;
14852 if (archinfo
->hwcaps
& VEX_HWCAPS_X86_MMXEXT
) {
14853 fName
= "x86g_dirtyhelper_CPUID_mmxext";
14854 fAddr
= &x86g_dirtyhelper_CPUID_mmxext
;
14857 if (archinfo
->hwcaps
== 0/*no SSE*/) {
14858 fName
= "x86g_dirtyhelper_CPUID_sse0";
14859 fAddr
= &x86g_dirtyhelper_CPUID_sse0
;
14861 vpanic("disInstr(x86)(cpuid)");
14863 vassert(fName
); vassert(fAddr
);
14864 d
= unsafeIRDirty_0_N ( 0/*regparms*/,
14865 fName
, fAddr
, mkIRExprVec_1(IRExpr_GSPTR()) );
14866 /* declare guest state effects */
14868 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
14869 d
->fxState
[0].fx
= Ifx_Modify
;
14870 d
->fxState
[0].offset
= OFFB_EAX
;
14871 d
->fxState
[0].size
= 4;
14872 d
->fxState
[1].fx
= Ifx_Write
;
14873 d
->fxState
[1].offset
= OFFB_EBX
;
14874 d
->fxState
[1].size
= 4;
14875 d
->fxState
[2].fx
= Ifx_Modify
;
14876 d
->fxState
[2].offset
= OFFB_ECX
;
14877 d
->fxState
[2].size
= 4;
14878 d
->fxState
[3].fx
= Ifx_Write
;
14879 d
->fxState
[3].offset
= OFFB_EDX
;
14880 d
->fxState
[3].size
= 4;
14881 /* execute the dirty call, side-effecting guest state */
14882 stmt( IRStmt_Dirty(d
) );
14883 /* CPUID is a serialising insn. So, just in case someone is
14884 using it as a memory fence ... */
14885 stmt( IRStmt_MBE(Imbe_Fence
) );
14890 //-- if (!VG_(cpu_has_feature)(VG_X86_FEAT_CPUID))
14891 //-- goto decode_failure;
14893 //-- t1 = newTemp(cb);
14894 //-- t2 = newTemp(cb);
14895 //-- t3 = newTemp(cb);
14896 //-- t4 = newTemp(cb);
14897 //-- uInstr0(cb, CALLM_S, 0);
14899 //-- uInstr2(cb, GET, 4, ArchReg, R_EAX, TempReg, t1);
14900 //-- uInstr1(cb, PUSH, 4, TempReg, t1);
14902 //-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t2);
14903 //-- uLiteral(cb, 0);
14904 //-- uInstr1(cb, PUSH, 4, TempReg, t2);
14906 //-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t3);
14907 //-- uLiteral(cb, 0);
14908 //-- uInstr1(cb, PUSH, 4, TempReg, t3);
14910 //-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t4);
14911 //-- uLiteral(cb, 0);
14912 //-- uInstr1(cb, PUSH, 4, TempReg, t4);
14914 //-- uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_CPUID));
14915 //-- uFlagsRWU(cb, FlagsEmpty, FlagsEmpty, FlagsEmpty);
14917 //-- uInstr1(cb, POP, 4, TempReg, t4);
14918 //-- uInstr2(cb, PUT, 4, TempReg, t4, ArchReg, R_EDX);
14920 //-- uInstr1(cb, POP, 4, TempReg, t3);
14921 //-- uInstr2(cb, PUT, 4, TempReg, t3, ArchReg, R_ECX);
14923 //-- uInstr1(cb, POP, 4, TempReg, t2);
14924 //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_EBX);
14926 //-- uInstr1(cb, POP, 4, TempReg, t1);
14927 //-- uInstr2(cb, PUT, 4, TempReg, t1, ArchReg, R_EAX);
14929 //-- uInstr0(cb, CALLM_E, 0);
14930 //-- DIP("cpuid\n");
14933 /* =-=-=-=-=-=-=-=-=- MOVZX, MOVSX =-=-=-=-=-=-=-= */
14935 case 0xB6: /* MOVZXb Eb,Gv */
14936 if (sz
!= 2 && sz
!= 4)
14937 goto decode_failure
;
14938 delta
= dis_movx_E_G ( sorb
, delta
, 1, sz
, False
);
14941 case 0xB7: /* MOVZXw Ew,Gv */
14943 goto decode_failure
;
14944 delta
= dis_movx_E_G ( sorb
, delta
, 2, 4, False
);
14947 case 0xBE: /* MOVSXb Eb,Gv */
14948 if (sz
!= 2 && sz
!= 4)
14949 goto decode_failure
;
14950 delta
= dis_movx_E_G ( sorb
, delta
, 1, sz
, True
);
14953 case 0xBF: /* MOVSXw Ew,Gv */
14954 if (sz
!= 4 && /* accept movsww, sigh, see #250799 */sz
!= 2)
14955 goto decode_failure
;
14956 delta
= dis_movx_E_G ( sorb
, delta
, 2, sz
, True
);
14959 //-- /* =-=-=-=-=-=-=-=-=-=-= MOVNTI -=-=-=-=-=-=-=-=-= */
14961 //-- case 0xC3: /* MOVNTI Gv,Ev */
14962 //-- vg_assert(sz == 4);
14963 //-- modrm = getUChar(eip);
14964 //-- vg_assert(!epartIsReg(modrm));
14965 //-- t1 = newTemp(cb);
14966 //-- uInstr2(cb, GET, 4, ArchReg, gregOfRM(modrm), TempReg, t1);
14967 //-- pair = disAMode ( cb, sorb, eip, dis_buf );
14968 //-- t2 = LOW24(pair);
14969 //-- eip += HI8(pair);
14970 //-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2);
14971 //-- DIP("movnti %s,%s\n", nameIReg(4,gregOfRM(modrm)), dis_buf);
14974 /* =-=-=-=-=-=-=-=-=- MUL/IMUL =-=-=-=-=-=-=-=-=-= */
14976 case 0xAF: /* IMUL Ev, Gv */
14977 delta
= dis_mul_E_G ( sorb
, sz
, delta
);
14980 /* =-=-=-=-=-=-=-=-=- NOPs =-=-=-=-=-=-=-=-=-=-=-= */
14983 modrm
= getUChar(delta
);
14984 if (epartIsReg(modrm
)) goto decode_failure
;
14985 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
14987 DIP("nop%c %s\n", nameISize(sz
), dis_buf
);
14990 /* =-=-=-=-=-=-=-=-=- Jcond d32 -=-=-=-=-=-=-=-=-= */
14993 case 0x82: /* JBb/JNAEb (jump below) */
14994 case 0x83: /* JNBb/JAEb (jump not below) */
14995 case 0x84: /* JZb/JEb (jump zero) */
14996 case 0x85: /* JNZb/JNEb (jump not zero) */
14997 case 0x86: /* JBEb/JNAb (jump below or equal) */
14998 case 0x87: /* JNBEb/JAb (jump not below or equal) */
14999 case 0x88: /* JSb (jump negative) */
15000 case 0x89: /* JSb (jump not negative) */
15001 case 0x8A: /* JP (jump parity even) */
15002 case 0x8B: /* JNP/JPO (jump parity odd) */
15003 case 0x8C: /* JLb/JNGEb (jump less) */
15004 case 0x8D: /* JGEb/JNLb (jump greater or equal) */
15005 case 0x8E: /* JLEb/JNGb (jump less or equal) */
15006 case 0x8F: /* JGb/JNLEb (jump greater) */
15008 const HChar
* comment
= "";
15009 jmpDelta
= (Int
)getUDisp32(delta
);
15010 d32
= (((Addr32
)guest_EIP_bbstart
)+delta
+4) + jmpDelta
;
15012 /* End the block at this point. */
15013 jcc_01( &dres
, (X86Condcode
)(opc
- 0x80),
15014 (Addr32
)(guest_EIP_bbstart
+delta
), d32
);
15015 vassert(dres
.whatNext
== Dis_StopHere
);
15016 DIP("j%s-32 0x%x %s\n", name_X86Condcode(opc
- 0x80), d32
, comment
);
15020 /* =-=-=-=-=-=-=-=-=- RDTSC -=-=-=-=-=-=-=-=-=-=-= */
15021 case 0x31: { /* RDTSC */
15022 IRTemp val
= newTemp(Ity_I64
);
15023 IRExpr
** args
= mkIRExprVec_0();
15024 IRDirty
* d
= unsafeIRDirty_1_N (
15027 "x86g_dirtyhelper_RDTSC",
15028 &x86g_dirtyhelper_RDTSC
,
15031 /* execute the dirty call, dumping the result in val. */
15032 stmt( IRStmt_Dirty(d
) );
15033 putIReg(4, R_EDX
, unop(Iop_64HIto32
, mkexpr(val
)));
15034 putIReg(4, R_EAX
, unop(Iop_64to32
, mkexpr(val
)));
15039 /* =-=-=-=-=-=-=-=-=- PUSH/POP Sreg =-=-=-=-=-=-=-=-=-= */
15041 case 0xA1: /* POP %FS */
15042 dis_pop_segreg( R_FS
, sz
); break;
15043 case 0xA9: /* POP %GS */
15044 dis_pop_segreg( R_GS
, sz
); break;
15046 case 0xA0: /* PUSH %FS */
15047 dis_push_segreg( R_FS
, sz
); break;
15048 case 0xA8: /* PUSH %GS */
15049 dis_push_segreg( R_GS
, sz
); break;
15051 /* =-=-=-=-=-=-=-=-=- SETcc Eb =-=-=-=-=-=-=-=-=-= */
15054 case 0x92: /* set-Bb/set-NAEb (jump below) */
15055 case 0x93: /* set-NBb/set-AEb (jump not below) */
15056 case 0x94: /* set-Zb/set-Eb (jump zero) */
15057 case 0x95: /* set-NZb/set-NEb (jump not zero) */
15058 case 0x96: /* set-BEb/set-NAb (jump below or equal) */
15059 case 0x97: /* set-NBEb/set-Ab (jump not below or equal) */
15060 case 0x98: /* set-Sb (jump negative) */
15061 case 0x99: /* set-Sb (jump not negative) */
15062 case 0x9A: /* set-P (jump parity even) */
15063 case 0x9B: /* set-NP (jump parity odd) */
15064 case 0x9C: /* set-Lb/set-NGEb (jump less) */
15065 case 0x9D: /* set-GEb/set-NLb (jump greater or equal) */
15066 case 0x9E: /* set-LEb/set-NGb (jump less or equal) */
15067 case 0x9F: /* set-Gb/set-NLEb (jump greater) */
15068 t1
= newTemp(Ity_I8
);
15069 assign( t1
, unop(Iop_1Uto8
,mk_x86g_calculate_condition(opc
-0x90)) );
15070 modrm
= getIByte(delta
);
15071 if (epartIsReg(modrm
)) {
15073 putIReg(1, eregOfRM(modrm
), mkexpr(t1
));
15074 DIP("set%s %s\n", name_X86Condcode(opc
-0x90),
15075 nameIReg(1,eregOfRM(modrm
)));
15077 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
15079 storeLE( mkexpr(addr
), mkexpr(t1
) );
15080 DIP("set%s %s\n", name_X86Condcode(opc
-0x90), dis_buf
);
15084 /* =-=-=-=-=-=-=-=-=- SHLD/SHRD -=-=-=-=-=-=-=-=-= */
15086 case 0xA4: /* SHLDv imm8,Gv,Ev */
15087 modrm
= getIByte(delta
);
15088 d32
= delta
+ lengthAMode(delta
);
15089 vex_sprintf(dis_buf
, "$%d", getIByte(d32
));
15090 delta
= dis_SHLRD_Gv_Ev (
15091 sorb
, delta
, modrm
, sz
,
15092 mkU8(getIByte(d32
)), True
, /* literal */
15095 case 0xA5: /* SHLDv %cl,Gv,Ev */
15096 modrm
= getIByte(delta
);
15097 delta
= dis_SHLRD_Gv_Ev (
15098 sorb
, delta
, modrm
, sz
,
15099 getIReg(1,R_ECX
), False
, /* not literal */
15103 case 0xAC: /* SHRDv imm8,Gv,Ev */
15104 modrm
= getIByte(delta
);
15105 d32
= delta
+ lengthAMode(delta
);
15106 vex_sprintf(dis_buf
, "$%d", getIByte(d32
));
15107 delta
= dis_SHLRD_Gv_Ev (
15108 sorb
, delta
, modrm
, sz
,
15109 mkU8(getIByte(d32
)), True
, /* literal */
15112 case 0xAD: /* SHRDv %cl,Gv,Ev */
15113 modrm
= getIByte(delta
);
15114 delta
= dis_SHLRD_Gv_Ev (
15115 sorb
, delta
, modrm
, sz
,
15116 getIReg(1,R_ECX
), False
, /* not literal */
15120 /* =-=-=-=-=-=-=-=-=- SYSENTER -=-=-=-=-=-=-=-=-=-= */
15123 /* Simple implementation needing a long explaination.
15125 sysenter is a kind of syscall entry. The key thing here
15126 is that the return address is not known -- that is
15127 something that is beyond Vex's knowledge. So this IR
15128 forces a return to the scheduler, which can do what it
15129 likes to simulate the systenter, but it MUST set this
15130 thread's guest_EIP field with the continuation address
15131 before resuming execution. If that doesn't happen, the
15132 thread will jump to address zero, which is probably
15136 /* Note where we are, so we can back up the guest to this
15137 point if the syscall needs to be restarted. */
15138 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL
,
15139 mkU32(guest_EIP_curr_instr
) ) );
15140 jmp_lit(&dres
, Ijk_Sys_sysenter
, 0/*bogus next EIP value*/);
15141 vassert(dres
.whatNext
== Dis_StopHere
);
15145 /* =-=-=-=-=-=-=-=-=- XADD -=-=-=-=-=-=-=-=-=-= */
15147 case 0xC0: { /* XADD Gb,Eb */
15149 delta
= dis_xadd_G_E ( sorb
, pfx_lock
, 1, delta
, &decodeOK
);
15150 if (!decodeOK
) goto decode_failure
;
15153 case 0xC1: { /* XADD Gv,Ev */
15155 delta
= dis_xadd_G_E ( sorb
, pfx_lock
, sz
, delta
, &decodeOK
);
15156 if (!decodeOK
) goto decode_failure
;
15160 /* =-=-=-=-=-=-=-=-=- MMXery =-=-=-=-=-=-=-=-=-=-= */
15164 case 0x73: /* PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
15166 case 0x6E: /* MOVD (src)ireg-or-mem, (dst)mmxreg */
15167 case 0x7E: /* MOVD (src)mmxreg, (dst)ireg-or-mem */
15168 case 0x7F: /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
15169 case 0x6F: /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
15173 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
15176 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
15179 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
15183 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
15186 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
15189 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
15191 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
15192 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
15194 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
15198 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
15202 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
15204 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
15205 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
15206 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
15210 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
15214 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
15216 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
15217 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
15218 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
15219 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
15221 case 0xF1: /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
15225 case 0xD1: /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
15229 case 0xE1: /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
15232 Int delta0
= delta
-1;
15233 Bool decode_OK
= False
;
15235 /* If sz==2 this is SSE, and we assume sse idec has
15236 already spotted those cases by now. */
15238 goto decode_failure
;
15240 delta
= dis_MMX ( &decode_OK
, sorb
, sz
, delta
-1 );
15243 goto decode_failure
;
15248 case 0x0E: /* FEMMS */
15249 case 0x77: /* EMMS */
15251 goto decode_failure
;
15252 do_EMMS_preamble();
15256 /* =-=-=-=-=-=-=-=-=- SGDT and SIDT =-=-=-=-=-=-=-=-=-=-= */
15257 case 0x01: /* 0F 01 /0 -- SGDT */
15258 /* 0F 01 /1 -- SIDT */
15260 /* This is really revolting, but ... since each processor
15261 (core) only has one IDT and one GDT, just let the guest
15262 see it (pass-through semantics). I can't see any way to
15263 construct a faked-up value, so don't bother to try. */
15264 modrm
= getUChar(delta
);
15265 if (epartIsReg(modrm
)) goto decode_failure
;
15266 if (gregOfRM(modrm
) != 0 && gregOfRM(modrm
) != 1)
15267 goto decode_failure
;
15268 addr
= disAMode ( &alen
, sorb
, delta
, dis_buf
);
15270 switch (gregOfRM(modrm
)) {
15271 case 0: DIP("sgdt %s\n", dis_buf
); break;
15272 case 1: DIP("sidt %s\n", dis_buf
); break;
15273 default: vassert(0); /*NOTREACHED*/
15276 IRDirty
* d
= unsafeIRDirty_0_N (
15278 "x86g_dirtyhelper_SxDT",
15279 &x86g_dirtyhelper_SxDT
,
15280 mkIRExprVec_2( mkexpr(addr
),
15281 mkU32(gregOfRM(modrm
)) )
15283 /* declare we're writing memory */
15284 d
->mFx
= Ifx_Write
;
15285 d
->mAddr
= mkexpr(addr
);
15287 stmt( IRStmt_Dirty(d
) );
15291 case 0x05: /* AMD's syscall */
15292 stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL
,
15293 mkU32(guest_EIP_curr_instr
) ) );
15294 jmp_lit(&dres
, Ijk_Sys_syscall
, ((Addr32
)guest_EIP_bbstart
)+delta
);
15295 vassert(dres
.whatNext
== Dis_StopHere
);
15299 /* =-=-=-=-=-=-=-=-=-=- UD2 =-=-=-=-=-=-=-=-=-=-=-= */
15301 case 0x0B: /* UD2 */
15302 stmt( IRStmt_Put( OFFB_EIP
, mkU32(guest_EIP_curr_instr
) ) );
15303 jmp_lit(&dres
, Ijk_NoDecode
, guest_EIP_curr_instr
);
15304 vassert(dres
.whatNext
== Dis_StopHere
);
15308 /* =-=-=-=-=-=-=-=-=- unimp2 =-=-=-=-=-=-=-=-=-=-= */
15311 goto decode_failure
;
15312 } /* switch (opc) for the 2-byte opcodes */
15313 goto decode_success
;
15314 } /* case 0x0F: of primary opcode */
15316 /* ------------------------ ??? ------------------------ */
15320 /* All decode failures end up here. */
15322 vex_printf("vex x86->IR: unhandled instruction bytes: "
15323 "0x%x 0x%x 0x%x 0x%x\n",
15324 getIByte(delta_start
+0),
15325 getIByte(delta_start
+1),
15326 getIByte(delta_start
+2),
15327 getIByte(delta_start
+3));
15330 /* Tell the dispatcher that this insn cannot be decoded, and so has
15331 not been executed, and (is currently) the next to be executed.
15332 EIP should be up-to-date since it made so at the start of each
15333 insn, but nevertheless be paranoid and update it again right
15335 stmt( IRStmt_Put( OFFB_EIP
, mkU32(guest_EIP_curr_instr
) ) );
15336 jmp_lit(&dres
, Ijk_NoDecode
, guest_EIP_curr_instr
);
15337 vassert(dres
.whatNext
== Dis_StopHere
);
15339 /* We also need to say that a CAS is not expected now, regardless
15340 of what it might have been set to at the start of the function,
15341 since the IR that we've emitted just above (to synthesis a
15342 SIGILL) does not involve any CAS, and presumably no other IR has
15343 been emitted for this (non-decoded) insn. */
15344 *expect_CAS
= False
;
15347 } /* switch (opc) for the main (primary) opcode switch. */
15350 /* All decode successes end up here. */
15351 switch (dres
.whatNext
) {
15353 stmt( IRStmt_Put( OFFB_EIP
, mkU32(guest_EIP_bbstart
+ delta
) ) );
15362 dres
.len
= delta
- delta_start
;
15370 /*------------------------------------------------------------*/
15371 /*--- Top-level fn ---*/
15372 /*------------------------------------------------------------*/
15374 /* Disassemble a single instruction into IR. The instruction
15375 is located in host memory at &guest_code[delta]. */
15377 DisResult
disInstr_X86 ( IRSB
* irsb_IN
,
15378 const UChar
* guest_code_IN
,
15381 VexArch guest_arch
,
15382 const VexArchInfo
* archinfo
,
15383 const VexAbiInfo
* abiinfo
,
15384 VexEndness host_endness_IN
,
15385 Bool sigill_diag_IN
)
15388 Bool expect_CAS
, has_CAS
;
15391 /* Set globals (see top of this file) */
15392 vassert(guest_arch
== VexArchX86
);
15393 guest_code
= guest_code_IN
;
15395 host_endness
= host_endness_IN
;
15396 guest_EIP_curr_instr
= (Addr32
)guest_IP
;
15397 guest_EIP_bbstart
= (Addr32
)toUInt(guest_IP
- delta
);
15399 x1
= irsb_IN
->stmts_used
;
15400 expect_CAS
= False
;
15401 dres
= disInstr_X86_WRK ( &expect_CAS
,
15402 delta
, archinfo
, abiinfo
, sigill_diag_IN
);
15403 x2
= irsb_IN
->stmts_used
;
15406 /* See comment at the top of disInstr_X86_WRK for meaning of
15407 expect_CAS. Here, we (sanity-)check for the presence/absence of
15408 IRCAS as directed by the returned expect_CAS value. */
15410 for (i
= x1
; i
< x2
; i
++) {
15411 if (irsb_IN
->stmts
[i
]->tag
== Ist_CAS
)
15415 if (expect_CAS
!= has_CAS
) {
15416 /* inconsistency detected. re-disassemble the instruction so as
15417 to generate a useful error message; then assert. */
15418 vex_traceflags
|= VEX_TRACE_FE
;
15419 dres
= disInstr_X86_WRK ( &expect_CAS
,
15420 delta
, archinfo
, abiinfo
, sigill_diag_IN
);
15421 for (i
= x1
; i
< x2
; i
++) {
15422 vex_printf("\t\t");
15423 ppIRStmt(irsb_IN
->stmts
[i
]);
15426 /* Failure of this assertion is serious and denotes a bug in
15428 vpanic("disInstr_X86: inconsistency in LOCK prefix handling");
15435 /*--------------------------------------------------------------------*/
15436 /*--- end guest_x86_toIR.c ---*/
15437 /*--------------------------------------------------------------------*/