2 /*--------------------------------------------------------------------*/
3 /*--- begin guest_amd64_toIR.c ---*/
4 /*--------------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2004-2017 OpenWorks LLP
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
28 Neither the names of the U.S. Department of Energy nor the
29 University of California nor the names of its contributors may be
30 used to endorse or promote products derived from this software
31 without prior written permission.
34 /* Translates AMD64 code to IR. */
38 All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked
39 to ensure a 64-bit value is being written.
43 * all arithmetic done at 64 bits
45 * no FP exceptions, except for handling stack over/underflow
47 * FP rounding mode observed only for float->int conversions and
48 int->float conversions which could lose accuracy, and for
49 float-to-float rounding. For all other operations,
50 round-to-nearest is used, regardless.
52 * some of the FCOM cases could do with testing -- not convinced
53 that the args are the right way round.
55 * FSAVE does not re-initialise the FPU; it should do
57 * FINIT not only initialises the FPU environment, it also zeroes
58 all the FP registers. It should leave the registers unchanged.
60 SAHF should cause eflags[1] == 1, and in fact it produces 0. As
61 per Intel docs this bit has no meaning anyway. Since PUSHF is the
62 only way to observe eflags[1], a proper fix would be to make that
65 This module uses global variables and so is not MT-safe (if that
66 should ever become relevant).
69 /* Notes re address size overrides (0x67).
71 According to the AMD documentation (24594 Rev 3.09, Sept 2003,
72 "AMD64 Architecture Programmer's Manual Volume 3: General-Purpose
73 and System Instructions"), Section 1.2.3 ("Address-Size Override
76 0x67 applies to all explicit memory references, causing the top
77 32 bits of the effective address to become zero.
79 0x67 has no effect on stack references (push/pop); these always
82 0x67 changes the interpretation of instructions which implicitly
83 reference RCX/RSI/RDI, so that in fact ECX/ESI/EDI are used
98 /* "Special" instructions.
100 This instruction decoder can decode three special instructions
101 which mean nothing natively (are no-ops as far as regs/mem are
102 concerned) but have meaning for supporting Valgrind. A special
103 instruction is flagged by the 16-byte preamble 48C1C703 48C1C70D
104 48C1C73D 48C1C733 (in the standard interpretation, that means: rolq
105 $3, %rdi; rolq $13, %rdi; rolq $61, %rdi; rolq $51, %rdi).
106 Following that, one of the following 3 are allowed (standard
107 interpretation in parentheses):
109 4887DB (xchgq %rbx,%rbx) %RDX = client_request ( %RAX )
110 4887C9 (xchgq %rcx,%rcx) %RAX = guest_NRADDR
111 4887D2 (xchgq %rdx,%rdx) call-noredir *%RAX
112 4887F6 (xchgq %rdi,%rdi) IR injection
114 Any other bytes following the 16-byte preamble are illegal and
115 constitute a failure in instruction decoding. This all assumes
116 that the preamble will never occur except in specific code
117 fragments designed for Valgrind to catch.
119 No prefixes may precede a "Special" instruction.
122 /* casLE (implementation of lock-prefixed insns) and rep-prefixed
123 insns: the side-exit back to the start of the insn is done with
124 Ijk_Boring. This is quite wrong, it should be done with
125 Ijk_NoRedir, since otherwise the side exit, which is intended to
126 restart the instruction for whatever reason, could go somewhere
127 entirely else. Doing it right (with Ijk_NoRedir jumps) would make
128 no-redir jumps performance critical, at least for rep-prefixed
129 instructions, since all iterations thereof would involve such a
130 jump. It's not such a big deal with casLE since the side exit is
131 only taken if the CAS fails, that is, the location is contended,
132 which is relatively unlikely.
134 Note also, the test for CAS success vs failure is done using
135 Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary
136 Iop_Cmp{EQ,NE} equivalents. This is so as to tell Memcheck that it
137 shouldn't definedness-check these comparisons. See
138 COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for
139 background/rationale.
142 /* LOCK prefixed instructions. These are translated using IR-level
143 CAS statements (IRCAS) and are believed to preserve atomicity, even
144 from the point of view of some other process racing against a
145 simulated one (presumably they communicate via a shared memory
148 Handlers which are aware of LOCK prefixes are:
149 dis_op2_G_E (add, or, adc, sbb, and, sub, xor)
150 dis_cmpxchg_G_E (cmpxchg)
151 dis_Grp1 (add, or, adc, sbb, and, sub, xor)
155 dis_Grp8_Imm (bts, btc, btr)
156 dis_bt_G_E (bts, btc, btr)
161 #include "libvex_basictypes.h"
162 #include "libvex_ir.h"
164 #include "libvex_guest_amd64.h"
166 #include "main_util.h"
167 #include "main_globals.h"
168 #include "guest_generic_bb_to_IR.h"
169 #include "guest_generic_x87.h"
170 #include "guest_amd64_defs.h"
173 /*------------------------------------------------------------*/
175 /*------------------------------------------------------------*/
177 /* These are set at the start of the translation of an insn, right
178 down in disInstr_AMD64, so that we don't have to pass them around
179 endlessly. They are all constant during the translation of any
182 /* These are set at the start of the translation of a BB, so
183 that we don't have to pass them around endlessly. */
185 /* We need to know this to do sub-register accesses correctly. */
186 static VexEndness host_endness
;
188 /* Pointer to the guest code area (points to start of BB, not to the
189 insn being processed). */
190 static const UChar
* guest_code
;
192 /* The guest address corresponding to guest_code[0]. */
193 static Addr64 guest_RIP_bbstart
;
195 /* The guest address for the instruction currently being
197 static Addr64 guest_RIP_curr_instr
;
199 /* The IRSB* into which we're generating code. */
202 /* For ensuring that %rip-relative addressing is done right. A read
203 of %rip generates the address of the next instruction. It may be
204 that we don't conveniently know that inside disAMode(). For sanity
205 checking, if the next insn %rip is needed, we make a guess at what
206 it is, record that guess here, and set the accompanying Bool to
207 indicate that -- after this insn's decode is finished -- that guess
208 needs to be checked. */
210 /* At the start of each insn decode, is set to (0, False).
211 After the decode, if _mustcheck is now True, _assumed is
214 static Addr64 guest_RIP_next_assumed
;
215 static Bool guest_RIP_next_mustcheck
;
218 /*------------------------------------------------------------*/
219 /*--- Helpers for constructing IR. ---*/
220 /*------------------------------------------------------------*/
222 /* Generate a new temporary of the given type. */
223 static IRTemp
newTemp ( IRType ty
)
225 vassert(isPlausibleIRType(ty
));
226 return newIRTemp( irsb
->tyenv
, ty
);
229 /* Add a statement to the list held by "irsb". */
230 static void stmt ( IRStmt
* st
)
232 addStmtToIRSB( irsb
, st
);
235 /* Generate a statement "dst := e". */
236 static void assign ( IRTemp dst
, IRExpr
* e
)
238 stmt( IRStmt_WrTmp(dst
, e
) );
241 static IRExpr
* unop ( IROp op
, IRExpr
* a
)
243 return IRExpr_Unop(op
, a
);
246 static IRExpr
* binop ( IROp op
, IRExpr
* a1
, IRExpr
* a2
)
248 return IRExpr_Binop(op
, a1
, a2
);
251 static IRExpr
* triop ( IROp op
, IRExpr
* a1
, IRExpr
* a2
, IRExpr
* a3
)
253 return IRExpr_Triop(op
, a1
, a2
, a3
);
256 static IRExpr
* mkexpr ( IRTemp tmp
)
258 return IRExpr_RdTmp(tmp
);
261 static IRExpr
* mkU8 ( ULong i
)
264 return IRExpr_Const(IRConst_U8( (UChar
)i
));
267 static IRExpr
* mkU16 ( ULong i
)
269 vassert(i
< 0x10000ULL
);
270 return IRExpr_Const(IRConst_U16( (UShort
)i
));
273 static IRExpr
* mkU32 ( ULong i
)
275 vassert(i
< 0x100000000ULL
);
276 return IRExpr_Const(IRConst_U32( (UInt
)i
));
279 static IRExpr
* mkU64 ( ULong i
)
281 return IRExpr_Const(IRConst_U64(i
));
284 static IRExpr
* mkU ( IRType ty
, ULong i
)
287 case Ity_I8
: return mkU8(i
);
288 case Ity_I16
: return mkU16(i
);
289 case Ity_I32
: return mkU32(i
);
290 case Ity_I64
: return mkU64(i
);
291 default: vpanic("mkU(amd64)");
295 static void storeLE ( IRExpr
* addr
, IRExpr
* data
)
297 stmt( IRStmt_Store(Iend_LE
, addr
, data
) );
300 static IRExpr
* loadLE ( IRType ty
, IRExpr
* addr
)
302 return IRExpr_Load(Iend_LE
, ty
, addr
);
305 static IROp
mkSizedOp ( IRType ty
, IROp op8
)
307 vassert(op8
== Iop_Add8
|| op8
== Iop_Sub8
309 || op8
== Iop_Or8
|| op8
== Iop_And8
|| op8
== Iop_Xor8
310 || op8
== Iop_Shl8
|| op8
== Iop_Shr8
|| op8
== Iop_Sar8
311 || op8
== Iop_CmpEQ8
|| op8
== Iop_CmpNE8
312 || op8
== Iop_CasCmpNE8
313 || op8
== Iop_Not8
);
315 case Ity_I8
: return 0 +op8
;
316 case Ity_I16
: return 1 +op8
;
317 case Ity_I32
: return 2 +op8
;
318 case Ity_I64
: return 3 +op8
;
319 default: vpanic("mkSizedOp(amd64)");
324 IRExpr
* doScalarWidening ( Int szSmall
, Int szBig
, Bool signd
, IRExpr
* src
)
326 if (szSmall
== 1 && szBig
== 4) {
327 return unop(signd
? Iop_8Sto32
: Iop_8Uto32
, src
);
329 if (szSmall
== 1 && szBig
== 2) {
330 return unop(signd
? Iop_8Sto16
: Iop_8Uto16
, src
);
332 if (szSmall
== 2 && szBig
== 4) {
333 return unop(signd
? Iop_16Sto32
: Iop_16Uto32
, src
);
335 if (szSmall
== 1 && szBig
== 8 && !signd
) {
336 return unop(Iop_8Uto64
, src
);
338 if (szSmall
== 1 && szBig
== 8 && signd
) {
339 return unop(Iop_8Sto64
, src
);
341 if (szSmall
== 2 && szBig
== 8 && !signd
) {
342 return unop(Iop_16Uto64
, src
);
344 if (szSmall
== 2 && szBig
== 8 && signd
) {
345 return unop(Iop_16Sto64
, src
);
347 vpanic("doScalarWidening(amd64)");
351 void putGuarded ( Int gstOffB
, IRExpr
* guard
, IRExpr
* value
)
353 IRType ty
= typeOfIRExpr(irsb
->tyenv
, value
);
354 stmt( IRStmt_Put(gstOffB
,
355 IRExpr_ITE(guard
, value
, IRExpr_Get(gstOffB
, ty
))) );
359 /*------------------------------------------------------------*/
360 /*--- Debugging output ---*/
361 /*------------------------------------------------------------*/
363 /* Bomb out if we can't handle something. */
364 __attribute__ ((noreturn
))
365 static void unimplemented ( const HChar
* str
)
367 vex_printf("amd64toIR: unimplemented feature\n");
371 #define DIP(format, args...) \
372 if (vex_traceflags & VEX_TRACE_FE) \
373 vex_printf(format, ## args)
375 #define DIS(buf, format, args...) \
376 if (vex_traceflags & VEX_TRACE_FE) \
377 vex_sprintf(buf, format, ## args)
380 /*------------------------------------------------------------*/
381 /*--- Offsets of various parts of the amd64 guest state. ---*/
382 /*------------------------------------------------------------*/
384 #define OFFB_RAX offsetof(VexGuestAMD64State,guest_RAX)
385 #define OFFB_RBX offsetof(VexGuestAMD64State,guest_RBX)
386 #define OFFB_RCX offsetof(VexGuestAMD64State,guest_RCX)
387 #define OFFB_RDX offsetof(VexGuestAMD64State,guest_RDX)
388 #define OFFB_RSP offsetof(VexGuestAMD64State,guest_RSP)
389 #define OFFB_RBP offsetof(VexGuestAMD64State,guest_RBP)
390 #define OFFB_RSI offsetof(VexGuestAMD64State,guest_RSI)
391 #define OFFB_RDI offsetof(VexGuestAMD64State,guest_RDI)
392 #define OFFB_R8 offsetof(VexGuestAMD64State,guest_R8)
393 #define OFFB_R9 offsetof(VexGuestAMD64State,guest_R9)
394 #define OFFB_R10 offsetof(VexGuestAMD64State,guest_R10)
395 #define OFFB_R11 offsetof(VexGuestAMD64State,guest_R11)
396 #define OFFB_R12 offsetof(VexGuestAMD64State,guest_R12)
397 #define OFFB_R13 offsetof(VexGuestAMD64State,guest_R13)
398 #define OFFB_R14 offsetof(VexGuestAMD64State,guest_R14)
399 #define OFFB_R15 offsetof(VexGuestAMD64State,guest_R15)
401 #define OFFB_RIP offsetof(VexGuestAMD64State,guest_RIP)
403 #define OFFB_FS_CONST offsetof(VexGuestAMD64State,guest_FS_CONST)
404 #define OFFB_GS_CONST offsetof(VexGuestAMD64State,guest_GS_CONST)
406 #define OFFB_CC_OP offsetof(VexGuestAMD64State,guest_CC_OP)
407 #define OFFB_CC_DEP1 offsetof(VexGuestAMD64State,guest_CC_DEP1)
408 #define OFFB_CC_DEP2 offsetof(VexGuestAMD64State,guest_CC_DEP2)
409 #define OFFB_CC_NDEP offsetof(VexGuestAMD64State,guest_CC_NDEP)
411 #define OFFB_FPREGS offsetof(VexGuestAMD64State,guest_FPREG[0])
412 #define OFFB_FPTAGS offsetof(VexGuestAMD64State,guest_FPTAG[0])
413 #define OFFB_DFLAG offsetof(VexGuestAMD64State,guest_DFLAG)
414 #define OFFB_ACFLAG offsetof(VexGuestAMD64State,guest_ACFLAG)
415 #define OFFB_IDFLAG offsetof(VexGuestAMD64State,guest_IDFLAG)
416 #define OFFB_FTOP offsetof(VexGuestAMD64State,guest_FTOP)
417 #define OFFB_FC3210 offsetof(VexGuestAMD64State,guest_FC3210)
418 #define OFFB_FPROUND offsetof(VexGuestAMD64State,guest_FPROUND)
420 #define OFFB_SSEROUND offsetof(VexGuestAMD64State,guest_SSEROUND)
421 #define OFFB_YMM0 offsetof(VexGuestAMD64State,guest_YMM0)
422 #define OFFB_YMM1 offsetof(VexGuestAMD64State,guest_YMM1)
423 #define OFFB_YMM2 offsetof(VexGuestAMD64State,guest_YMM2)
424 #define OFFB_YMM3 offsetof(VexGuestAMD64State,guest_YMM3)
425 #define OFFB_YMM4 offsetof(VexGuestAMD64State,guest_YMM4)
426 #define OFFB_YMM5 offsetof(VexGuestAMD64State,guest_YMM5)
427 #define OFFB_YMM6 offsetof(VexGuestAMD64State,guest_YMM6)
428 #define OFFB_YMM7 offsetof(VexGuestAMD64State,guest_YMM7)
429 #define OFFB_YMM8 offsetof(VexGuestAMD64State,guest_YMM8)
430 #define OFFB_YMM9 offsetof(VexGuestAMD64State,guest_YMM9)
431 #define OFFB_YMM10 offsetof(VexGuestAMD64State,guest_YMM10)
432 #define OFFB_YMM11 offsetof(VexGuestAMD64State,guest_YMM11)
433 #define OFFB_YMM12 offsetof(VexGuestAMD64State,guest_YMM12)
434 #define OFFB_YMM13 offsetof(VexGuestAMD64State,guest_YMM13)
435 #define OFFB_YMM14 offsetof(VexGuestAMD64State,guest_YMM14)
436 #define OFFB_YMM15 offsetof(VexGuestAMD64State,guest_YMM15)
437 #define OFFB_YMM16 offsetof(VexGuestAMD64State,guest_YMM16)
439 #define OFFB_EMNOTE offsetof(VexGuestAMD64State,guest_EMNOTE)
440 #define OFFB_CMSTART offsetof(VexGuestAMD64State,guest_CMSTART)
441 #define OFFB_CMLEN offsetof(VexGuestAMD64State,guest_CMLEN)
443 #define OFFB_NRADDR offsetof(VexGuestAMD64State,guest_NRADDR)
446 /*------------------------------------------------------------*/
447 /*--- Helper bits and pieces for deconstructing the ---*/
448 /*--- amd64 insn stream. ---*/
449 /*------------------------------------------------------------*/
451 /* This is the AMD64 register encoding -- integer regs. */
469 /* This is the Intel register encoding -- segment regs. */
478 /* Various simple conversions */
480 static ULong
extend_s_8to64 ( UChar x
)
482 return (ULong
)((Long
)(((ULong
)x
) << 56) >> 56);
485 static ULong
extend_s_16to64 ( UShort x
)
487 return (ULong
)((Long
)(((ULong
)x
) << 48) >> 48);
490 static ULong
extend_s_32to64 ( UInt x
)
492 return (ULong
)((Long
)(((ULong
)x
) << 32) >> 32);
495 /* Figure out whether the mod and rm parts of a modRM byte refer to a
496 register or memory. If so, the byte will have the form 11XXXYYY,
497 where YYY is the register number. */
499 static Bool
epartIsReg ( UChar mod_reg_rm
)
501 return toBool(0xC0 == (mod_reg_rm
& 0xC0));
504 /* Extract the 'g' field from a modRM byte. This only produces 3
505 bits, which is not a complete register number. You should avoid
506 this function if at all possible. */
508 static Int
gregLO3ofRM ( UChar mod_reg_rm
)
510 return (Int
)( (mod_reg_rm
>> 3) & 7 );
513 /* Ditto the 'e' field of a modRM byte. */
515 static Int
eregLO3ofRM ( UChar mod_reg_rm
)
517 return (Int
)(mod_reg_rm
& 0x7);
520 /* Get a 8/16/32-bit unsigned value out of the insn stream. */
522 static inline UChar
getUChar ( Long delta
)
524 UChar v
= guest_code
[delta
+0];
528 static UInt
getUDisp16 ( Long delta
)
530 UInt v
= guest_code
[delta
+1]; v
<<= 8;
531 v
|= guest_code
[delta
+0];
535 //.. static UInt getUDisp ( Int size, Long delta )
538 //.. case 4: return getUDisp32(delta);
539 //.. case 2: return getUDisp16(delta);
540 //.. case 1: return getUChar(delta);
541 //.. default: vpanic("getUDisp(x86)");
543 //.. return 0; /*notreached*/
547 /* Get a byte value out of the insn stream and sign-extend to 64
549 static Long
getSDisp8 ( Long delta
)
551 return extend_s_8to64( guest_code
[delta
] );
554 /* Get a 16-bit value out of the insn stream and sign-extend to 64
556 static Long
getSDisp16 ( Long delta
)
558 UInt v
= guest_code
[delta
+1]; v
<<= 8;
559 v
|= guest_code
[delta
+0];
560 return extend_s_16to64( (UShort
)v
);
563 /* Get a 32-bit value out of the insn stream and sign-extend to 64
565 static Long
getSDisp32 ( Long delta
)
567 UInt v
= guest_code
[delta
+3]; v
<<= 8;
568 v
|= guest_code
[delta
+2]; v
<<= 8;
569 v
|= guest_code
[delta
+1]; v
<<= 8;
570 v
|= guest_code
[delta
+0];
571 return extend_s_32to64( v
);
574 /* Get a 64-bit value out of the insn stream. */
575 static Long
getDisp64 ( Long delta
)
578 v
|= guest_code
[delta
+7]; v
<<= 8;
579 v
|= guest_code
[delta
+6]; v
<<= 8;
580 v
|= guest_code
[delta
+5]; v
<<= 8;
581 v
|= guest_code
[delta
+4]; v
<<= 8;
582 v
|= guest_code
[delta
+3]; v
<<= 8;
583 v
|= guest_code
[delta
+2]; v
<<= 8;
584 v
|= guest_code
[delta
+1]; v
<<= 8;
585 v
|= guest_code
[delta
+0];
589 /* Note: because AMD64 doesn't allow 64-bit literals, it is an error
590 if this is called with size==8. Should not happen. */
591 static Long
getSDisp ( Int size
, Long delta
)
594 case 4: return getSDisp32(delta
);
595 case 2: return getSDisp16(delta
);
596 case 1: return getSDisp8(delta
);
597 default: vpanic("getSDisp(amd64)");
601 static ULong
mkSizeMask ( Int sz
)
604 case 1: return 0x00000000000000FFULL
;
605 case 2: return 0x000000000000FFFFULL
;
606 case 4: return 0x00000000FFFFFFFFULL
;
607 case 8: return 0xFFFFFFFFFFFFFFFFULL
;
608 default: vpanic("mkSzMask(amd64)");
612 static Int
imin ( Int a
, Int b
)
614 return (a
< b
) ? a
: b
;
617 static IRType
szToITy ( Int n
)
620 case 1: return Ity_I8
;
621 case 2: return Ity_I16
;
622 case 4: return Ity_I32
;
623 case 8: return Ity_I64
;
624 default: vex_printf("\nszToITy(%d)\n", n
);
625 vpanic("szToITy(amd64)");
630 /*------------------------------------------------------------*/
631 /*--- For dealing with prefixes. ---*/
632 /*------------------------------------------------------------*/
634 /* The idea is to pass around an int holding a bitmask summarising
635 info from the prefixes seen on the current instruction, including
636 info from the REX byte. This info is used in various places, but
637 most especially when making sense of register fields in
640 The top 8 bits of the prefix are 0x55, just as a hacky way to
641 ensure it really is a valid prefix.
643 Things you can safely assume about a well-formed prefix:
644 * at most one segment-override bit (CS,DS,ES,FS,GS,SS) is set.
645 * if REX is not present then REXW,REXR,REXX,REXB will read
647 * F2 and F3 will not both be 1.
652 #define PFX_ASO (1<<0) /* address-size override present (0x67) */
653 #define PFX_66 (1<<1) /* operand-size override-to-16 present (0x66) */
654 #define PFX_REX (1<<2) /* REX byte present (0x40 to 0x4F) */
655 #define PFX_REXW (1<<3) /* REX W bit, if REX present, else 0 */
656 #define PFX_REXR (1<<4) /* REX R bit, if REX present, else 0 */
657 #define PFX_REXX (1<<5) /* REX X bit, if REX present, else 0 */
658 #define PFX_REXB (1<<6) /* REX B bit, if REX present, else 0 */
659 #define PFX_LOCK (1<<7) /* bus LOCK prefix present (0xF0) */
660 #define PFX_F2 (1<<8) /* REP/REPE/REPZ prefix present (0xF2) */
661 #define PFX_F3 (1<<9) /* REPNE/REPNZ prefix present (0xF3) */
662 #define PFX_CS (1<<10) /* CS segment prefix present (0x2E) */
663 #define PFX_DS (1<<11) /* DS segment prefix present (0x3E) */
664 #define PFX_ES (1<<12) /* ES segment prefix present (0x26) */
665 #define PFX_FS (1<<13) /* FS segment prefix present (0x64) */
666 #define PFX_GS (1<<14) /* GS segment prefix present (0x65) */
667 #define PFX_SS (1<<15) /* SS segment prefix present (0x36) */
668 #define PFX_VEX (1<<16) /* VEX prefix present (0xC4 or 0xC5) */
669 #define PFX_VEXL (1<<17) /* VEX L bit, if VEX present, else 0 */
670 /* The extra register field VEX.vvvv is encoded (after not-ing it) as
671 PFX_VEXnV3 .. PFX_VEXnV0, so these must occupy adjacent bit
673 #define PFX_VEXnV0 (1<<18) /* ~VEX vvvv[0], if VEX present, else 0 */
674 #define PFX_VEXnV1 (1<<19) /* ~VEX vvvv[1], if VEX present, else 0 */
675 #define PFX_VEXnV2 (1<<20) /* ~VEX vvvv[2], if VEX present, else 0 */
676 #define PFX_VEXnV3 (1<<21) /* ~VEX vvvv[3], if VEX present, else 0 */
679 #define PFX_EMPTY 0x55000000
681 static Bool
IS_VALID_PFX ( Prefix pfx
) {
682 return toBool((pfx
& 0xFF000000) == PFX_EMPTY
);
685 static Bool
haveREX ( Prefix pfx
) {
686 return toBool(pfx
& PFX_REX
);
689 static Int
getRexW ( Prefix pfx
) {
690 return (pfx
& PFX_REXW
) ? 1 : 0;
692 static Int
getRexR ( Prefix pfx
) {
693 return (pfx
& PFX_REXR
) ? 1 : 0;
695 static Int
getRexX ( Prefix pfx
) {
696 return (pfx
& PFX_REXX
) ? 1 : 0;
698 static Int
getRexB ( Prefix pfx
) {
699 return (pfx
& PFX_REXB
) ? 1 : 0;
702 /* Check a prefix doesn't have F2 or F3 set in it, since usually that
703 completely changes what instruction it really is. */
704 static Bool
haveF2orF3 ( Prefix pfx
) {
705 return toBool((pfx
& (PFX_F2
|PFX_F3
)) > 0);
707 static Bool
haveF2andF3 ( Prefix pfx
) {
708 return toBool((pfx
& (PFX_F2
|PFX_F3
)) == (PFX_F2
|PFX_F3
));
710 static Bool
haveF2 ( Prefix pfx
) {
711 return toBool((pfx
& PFX_F2
) > 0);
713 static Bool
haveF3 ( Prefix pfx
) {
714 return toBool((pfx
& PFX_F3
) > 0);
717 static Bool
have66 ( Prefix pfx
) {
718 return toBool((pfx
& PFX_66
) > 0);
720 static Bool
haveASO ( Prefix pfx
) {
721 return toBool((pfx
& PFX_ASO
) > 0);
723 static Bool
haveLOCK ( Prefix pfx
) {
724 return toBool((pfx
& PFX_LOCK
) > 0);
727 /* Return True iff pfx has 66 set and F2 and F3 clear */
728 static Bool
have66noF2noF3 ( Prefix pfx
)
731 toBool((pfx
& (PFX_66
|PFX_F2
|PFX_F3
)) == PFX_66
);
734 /* Return True iff pfx has F2 set and 66 and F3 clear */
735 static Bool
haveF2no66noF3 ( Prefix pfx
)
738 toBool((pfx
& (PFX_66
|PFX_F2
|PFX_F3
)) == PFX_F2
);
741 /* Return True iff pfx has F3 set and 66 and F2 clear */
742 static Bool
haveF3no66noF2 ( Prefix pfx
)
745 toBool((pfx
& (PFX_66
|PFX_F2
|PFX_F3
)) == PFX_F3
);
748 /* Return True iff pfx has F3 set and F2 clear */
749 static Bool
haveF3noF2 ( Prefix pfx
)
752 toBool((pfx
& (PFX_F2
|PFX_F3
)) == PFX_F3
);
755 /* Return True iff pfx has F2 set and F3 clear */
756 static Bool
haveF2noF3 ( Prefix pfx
)
759 toBool((pfx
& (PFX_F2
|PFX_F3
)) == PFX_F2
);
762 /* Return True iff pfx has F2 and F3 clear */
763 static Bool
haveNoF2noF3 ( Prefix pfx
)
766 toBool((pfx
& (PFX_F2
|PFX_F3
)) == 0);
769 /* Return True iff pfx has 66, F2 and F3 clear */
770 static Bool
haveNo66noF2noF3 ( Prefix pfx
)
773 toBool((pfx
& (PFX_66
|PFX_F2
|PFX_F3
)) == 0);
776 /* Return True iff pfx has any of 66, F2 and F3 set */
777 static Bool
have66orF2orF3 ( Prefix pfx
)
779 return toBool( ! haveNo66noF2noF3(pfx
) );
782 /* Return True iff pfx has 66 or F3 set */
783 static Bool
have66orF3 ( Prefix pfx
)
785 return toBool((pfx
& (PFX_66
|PFX_F3
)) > 0);
788 /* Clear all the segment-override bits in a prefix. */
789 static Prefix
clearSegBits ( Prefix p
)
792 p
& ~(PFX_CS
| PFX_DS
| PFX_ES
| PFX_FS
| PFX_GS
| PFX_SS
);
795 /* Get the (inverted, hence back to "normal") VEX.vvvv field. */
796 static UInt
getVexNvvvv ( Prefix pfx
) {
798 r
/= (UInt
)PFX_VEXnV0
; /* pray this turns into a shift */
802 static Bool
haveVEX ( Prefix pfx
) {
803 return toBool(pfx
& PFX_VEX
);
806 static Int
getVexL ( Prefix pfx
) {
807 return (pfx
& PFX_VEXL
) ? 1 : 0;
811 /*------------------------------------------------------------*/
812 /*--- For dealing with escapes ---*/
813 /*------------------------------------------------------------*/
816 /* Escapes come after the prefixes, but before the primary opcode
817 byte. They escape the primary opcode byte into a bigger space.
818 The 0xF0000000 isn't significant, except so as to make it not
819 overlap valid Prefix values, for sanity checking.
824 ESC_NONE
=0xF0000000, // none
832 /*------------------------------------------------------------*/
833 /*--- For dealing with integer registers ---*/
834 /*------------------------------------------------------------*/
836 /* This is somewhat complex. The rules are:
838 For 64, 32 and 16 bit register references, the e or g fields in the
839 modrm bytes supply the low 3 bits of the register number. The
840 fourth (most-significant) bit of the register number is supplied by
841 the REX byte, if it is present; else that bit is taken to be zero.
843 The REX.R bit supplies the high bit corresponding to the g register
844 field, and the REX.B bit supplies the high bit corresponding to the
845 e register field (when the mod part of modrm indicates that modrm's
846 e component refers to a register and not to memory).
848 The REX.X bit supplies a high register bit for certain registers
849 in SIB address modes, and is generally rarely used.
851 For 8 bit register references, the presence of the REX byte itself
852 has significance. If there is no REX present, then the 3-bit
853 number extracted from the modrm e or g field is treated as an index
854 into the sequence %al %cl %dl %bl %ah %ch %dh %bh -- that is, the
855 old x86 encoding scheme.
857 But if there is a REX present, the register reference is
858 interpreted in the same way as for 64/32/16-bit references: a high
859 bit is extracted from REX, giving a 4-bit number, and the denoted
860 register is the lowest 8 bits of the 16 integer registers denoted
861 by the number. In particular, values 3 through 7 of this sequence
862 do not refer to %ah %ch %dh %bh but instead to the lowest 8 bits of
865 The REX.W bit has no bearing at all on register numbers. Instead
866 its presence indicates that the operand size is to be overridden
867 from its default value (32 bits) to 64 bits instead. This is in
868 the same fashion that an 0x66 prefix indicates the operand size is
869 to be overridden from 32 bits down to 16 bits. When both REX.W and
870 0x66 are present there is a conflict, and REX.W takes precedence.
872 Rather than try to handle this complexity using a single huge
873 function, several smaller ones are provided. The aim is to make it
874 as difficult as possible to screw up register decoding in a subtle
875 and hard-to-track-down way.
877 Because these routines fish around in the host's memory (that is,
878 in the guest state area) for sub-parts of guest registers, their
879 correctness depends on the host's endianness. So far these
880 routines only work for little-endian hosts. Those for which
881 endianness is important have assertions to ensure sanity.
885 /* About the simplest question you can ask: where do the 64-bit
886 integer registers live (in the guest state) ? */
888 static Int
integerGuestReg64Offset ( UInt reg
)
891 case R_RAX
: return OFFB_RAX
;
892 case R_RCX
: return OFFB_RCX
;
893 case R_RDX
: return OFFB_RDX
;
894 case R_RBX
: return OFFB_RBX
;
895 case R_RSP
: return OFFB_RSP
;
896 case R_RBP
: return OFFB_RBP
;
897 case R_RSI
: return OFFB_RSI
;
898 case R_RDI
: return OFFB_RDI
;
899 case R_R8
: return OFFB_R8
;
900 case R_R9
: return OFFB_R9
;
901 case R_R10
: return OFFB_R10
;
902 case R_R11
: return OFFB_R11
;
903 case R_R12
: return OFFB_R12
;
904 case R_R13
: return OFFB_R13
;
905 case R_R14
: return OFFB_R14
;
906 case R_R15
: return OFFB_R15
;
907 default: vpanic("integerGuestReg64Offset(amd64)");
912 /* Produce the name of an integer register, for printing purposes.
913 reg is a number in the range 0 .. 15 that has been generated from a
914 3-bit reg-field number and a REX extension bit. irregular denotes
915 the case where sz==1 and no REX byte is present and where the denoted
916 sub-register is bits 15:8 of the containing 64-bit register. */
919 const HChar
* nameIReg ( Int sz
, UInt reg
, Bool irregular
)
921 static const HChar
* ireg64_names
[16]
922 = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
923 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" };
924 static const HChar
* ireg32_names
[16]
925 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
926 "%r8d", "%r9d", "%r10d","%r11d","%r12d","%r13d","%r14d","%r15d" };
927 static const HChar
* ireg16_names
[16]
928 = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di",
929 "%r8w", "%r9w", "%r10w","%r11w","%r12w","%r13w","%r14w","%r15w" };
930 static const HChar
* ireg8_names
[16]
931 = { "%al", "%cl", "%dl", "%bl", "%spl", "%bpl", "%sil", "%dil",
932 "%r8b", "%r9b", "%r10b","%r11b","%r12b","%r13b","%r14b","%r15b" };
933 static const HChar
* ireg8_irregular
[4]
934 = { "%ah", "%ch", "%dh", "%bh" };
939 vassert(reg
>= 4 && reg
< 8);
941 vassert(irregular
== False
);
945 case 8: return ireg64_names
[reg
];
946 case 4: return ireg32_names
[reg
];
947 case 2: return ireg16_names
[reg
];
948 case 1: if (irregular
) {
949 vassert(reg
>= 4 && reg
< 8);
950 return ireg8_irregular
[reg
- 4];
952 return ireg8_names
[reg
];
954 default: vpanic("nameIReg(amd64)");
958 /* Using the same argument conventions as nameIReg, produce the
959 guest state offset of an integer register. */
962 Int
offsetIReg ( Int sz
, UInt reg
, Bool irregular
)
967 vassert(reg
>= 4 && reg
< 8);
969 vassert(irregular
== False
);
972 /* Deal with irregular case -- sz==1 and no REX present */
973 if (sz
== 1 && irregular
) {
975 case R_RSP
: return 1+ OFFB_RAX
;
976 case R_RBP
: return 1+ OFFB_RCX
;
977 case R_RSI
: return 1+ OFFB_RDX
;
978 case R_RDI
: return 1+ OFFB_RBX
;
979 default: break; /* use the normal case */
984 return integerGuestReg64Offset(reg
);
988 /* Read the %CL register :: Ity_I8, for shift/rotate operations. */
990 static IRExpr
* getIRegCL ( void )
992 vassert(host_endness
== VexEndnessLE
);
993 return unop(Iop_64to8
, IRExpr_Get( OFFB_RCX
, Ity_I64
));
997 /* Write to the %AH register. */
999 static void putIRegAH ( IRExpr
* e
)
1001 vassert(host_endness
== VexEndnessLE
);
1002 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I8
);
1003 stmt( IRStmt_Put( OFFB_RAX
+1, e
) );
1007 /* Read/write various widths of %RAX, as it has various
1008 special-purpose uses. */
1010 static const HChar
* nameIRegRAX ( Int sz
)
1013 case 1: return "%al";
1014 case 2: return "%ax";
1015 case 4: return "%eax";
1016 case 8: return "%rax";
1017 default: vpanic("nameIRegRAX(amd64)");
1021 static IRExpr
* getIRegRAX ( Int sz
)
1023 vassert(host_endness
== VexEndnessLE
);
1025 case 1: return unop(Iop_64to8
, IRExpr_Get( OFFB_RAX
, Ity_I64
));
1026 case 2: return unop(Iop_64to16
, IRExpr_Get( OFFB_RAX
, Ity_I64
));
1027 case 4: return unop(Iop_64to32
, IRExpr_Get( OFFB_RAX
, Ity_I64
));
1028 case 8: return IRExpr_Get( OFFB_RAX
, Ity_I64
);
1029 default: vpanic("getIRegRAX(amd64)");
1033 static void putIRegRAX ( Int sz
, IRExpr
* e
)
1035 IRType ty
= typeOfIRExpr(irsb
->tyenv
, e
);
1036 vassert(host_endness
== VexEndnessLE
);
1038 case 8: vassert(ty
== Ity_I64
);
1039 stmt( IRStmt_Put( OFFB_RAX
, e
));
1041 case 4: vassert(ty
== Ity_I32
);
1042 stmt( IRStmt_Put( OFFB_RAX
, unop(Iop_32Uto64
,e
) ));
1044 case 2: vassert(ty
== Ity_I16
);
1045 stmt( IRStmt_Put( OFFB_RAX
, e
));
1047 case 1: vassert(ty
== Ity_I8
);
1048 stmt( IRStmt_Put( OFFB_RAX
, e
));
1050 default: vpanic("putIRegRAX(amd64)");
1055 /* Read/write various widths of %RDX, as it has various
1056 special-purpose uses. */
1058 static const HChar
* nameIRegRDX ( Int sz
)
1061 case 1: return "%dl";
1062 case 2: return "%dx";
1063 case 4: return "%edx";
1064 case 8: return "%rdx";
1065 default: vpanic("nameIRegRDX(amd64)");
1069 static IRExpr
* getIRegRDX ( Int sz
)
1071 vassert(host_endness
== VexEndnessLE
);
1073 case 1: return unop(Iop_64to8
, IRExpr_Get( OFFB_RDX
, Ity_I64
));
1074 case 2: return unop(Iop_64to16
, IRExpr_Get( OFFB_RDX
, Ity_I64
));
1075 case 4: return unop(Iop_64to32
, IRExpr_Get( OFFB_RDX
, Ity_I64
));
1076 case 8: return IRExpr_Get( OFFB_RDX
, Ity_I64
);
1077 default: vpanic("getIRegRDX(amd64)");
1081 static void putIRegRDX ( Int sz
, IRExpr
* e
)
1083 vassert(host_endness
== VexEndnessLE
);
1084 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == szToITy(sz
));
1086 case 8: stmt( IRStmt_Put( OFFB_RDX
, e
));
1088 case 4: stmt( IRStmt_Put( OFFB_RDX
, unop(Iop_32Uto64
,e
) ));
1090 case 2: stmt( IRStmt_Put( OFFB_RDX
, e
));
1092 case 1: stmt( IRStmt_Put( OFFB_RDX
, e
));
1094 default: vpanic("putIRegRDX(amd64)");
1099 /* Simplistic functions to deal with the integer registers as a
1100 straightforward bank of 16 64-bit regs. */
1102 static IRExpr
* getIReg64 ( UInt regno
)
1104 return IRExpr_Get( integerGuestReg64Offset(regno
),
1108 static void putIReg64 ( UInt regno
, IRExpr
* e
)
1110 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I64
);
1111 stmt( IRStmt_Put( integerGuestReg64Offset(regno
), e
) );
1114 static const HChar
* nameIReg64 ( UInt regno
)
1116 return nameIReg( 8, regno
, False
);
1120 /* Simplistic functions to deal with the lower halves of integer
1121 registers as a straightforward bank of 16 32-bit regs. */
1123 static IRExpr
* getIReg32 ( UInt regno
)
1125 vassert(host_endness
== VexEndnessLE
);
1126 return unop(Iop_64to32
,
1127 IRExpr_Get( integerGuestReg64Offset(regno
),
1131 static void putIReg32 ( UInt regno
, IRExpr
* e
)
1133 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I32
);
1134 stmt( IRStmt_Put( integerGuestReg64Offset(regno
),
1135 unop(Iop_32Uto64
,e
) ) );
1138 static const HChar
* nameIReg32 ( UInt regno
)
1140 return nameIReg( 4, regno
, False
);
1144 /* Simplistic functions to deal with the lower quarters of integer
1145 registers as a straightforward bank of 16 16-bit regs. */
1147 static IRExpr
* getIReg16 ( UInt regno
)
1149 vassert(host_endness
== VexEndnessLE
);
1150 return unop(Iop_64to16
,
1151 IRExpr_Get( integerGuestReg64Offset(regno
),
1155 static void putIReg16 ( UInt regno
, IRExpr
* e
)
1157 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I16
);
1158 stmt( IRStmt_Put( integerGuestReg64Offset(regno
),
1159 unop(Iop_16Uto64
,e
) ) );
1162 static const HChar
* nameIReg16 ( UInt regno
)
1164 return nameIReg( 2, regno
, False
);
1168 /* Sometimes what we know is a 3-bit register number, a REX byte, and
1169 which field of the REX byte is to be used to extend to a 4-bit
1170 number. These functions cater for that situation.
1172 static IRExpr
* getIReg64rexX ( Prefix pfx
, UInt lo3bits
)
1174 vassert(lo3bits
< 8);
1175 vassert(IS_VALID_PFX(pfx
));
1176 return getIReg64( lo3bits
| (getRexX(pfx
) << 3) );
1179 static const HChar
* nameIReg64rexX ( Prefix pfx
, UInt lo3bits
)
1181 vassert(lo3bits
< 8);
1182 vassert(IS_VALID_PFX(pfx
));
1183 return nameIReg( 8, lo3bits
| (getRexX(pfx
) << 3), False
);
1186 static const HChar
* nameIRegRexB ( Int sz
, Prefix pfx
, UInt lo3bits
)
1188 vassert(lo3bits
< 8);
1189 vassert(IS_VALID_PFX(pfx
));
1190 vassert(sz
== 8 || sz
== 4 || sz
== 2 || sz
== 1);
1191 UInt regNo
= lo3bits
| (getRexB(pfx
) << 3);
1192 return nameIReg( sz
, regNo
,
1193 toBool(sz
==1 && !haveREX(pfx
) && regNo
>= 4 && regNo
< 8));
1196 static IRExpr
* getIRegRexB ( Int sz
, Prefix pfx
, UInt lo3bits
)
1198 vassert(lo3bits
< 8);
1199 vassert(IS_VALID_PFX(pfx
));
1200 UInt regNo
= (getRexB(pfx
) << 3) | lo3bits
;
1204 offsetIReg( 8, regNo
, False
/*!irregular*/ ),
1209 return unop(Iop_64to32
,
1211 offsetIReg( 8, regNo
, False
/*!irregular*/ ),
1216 return unop(Iop_64to16
,
1218 offsetIReg( 8, regNo
, False
/*!irregular*/ ),
1223 Bool irregular
= !haveREX(pfx
) && regNo
>= 4 && regNo
< 8;
1226 offsetIReg( 1, regNo
, True
/*irregular*/ ),
1230 return unop(Iop_64to8
,
1232 offsetIReg( 8, regNo
, False
/*!irregular*/ ),
1238 vpanic("getIRegRexB");
1243 static void putIRegRexB ( Int sz
, Prefix pfx
, UInt lo3bits
, IRExpr
* e
)
1245 vassert(lo3bits
< 8);
1246 vassert(IS_VALID_PFX(pfx
));
1247 vassert(sz
== 8 || sz
== 4 || sz
== 2 || sz
== 1);
1248 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == szToITy(sz
));
1249 Bool irregular
= sz
== 1 && !haveREX(pfx
) && lo3bits
>= 4 && lo3bits
< 8;
1251 offsetIReg( sz
, lo3bits
| (getRexB(pfx
) << 3), irregular
),
1252 sz
==4 ? unop(Iop_32Uto64
,e
) : e
1257 /* Functions for getting register numbers from modrm bytes and REX
1258 when we don't have to consider the complexities of integer subreg
1261 /* Extract the g reg field from a modRM byte, and augment it using the
1262 REX.R bit from the supplied REX byte. The R bit usually is
1263 associated with the g register field.
1265 static UInt
gregOfRexRM ( Prefix pfx
, UChar mod_reg_rm
)
1267 Int reg
= (Int
)( (mod_reg_rm
>> 3) & 7 );
1268 reg
+= (pfx
& PFX_REXR
) ? 8 : 0;
1272 /* Extract the e reg field from a modRM byte, and augment it using the
1273 REX.B bit from the supplied REX byte. The B bit usually is
1274 associated with the e register field (when modrm indicates e is a
1277 static UInt
eregOfRexRM ( Prefix pfx
, UChar mod_reg_rm
)
1280 vassert(epartIsReg(mod_reg_rm
));
1281 rm
= (Int
)(mod_reg_rm
& 0x7);
1282 rm
+= (pfx
& PFX_REXB
) ? 8 : 0;
1287 /* General functions for dealing with integer register access. */
1289 /* Produce the guest state offset for a reference to the 'g' register
1290 field in a modrm byte, taking into account REX (or its absence),
1291 and the size of the access.
1293 static UInt
offsetIRegG ( Int sz
, Prefix pfx
, UChar mod_reg_rm
)
1296 vassert(host_endness
== VexEndnessLE
);
1297 vassert(IS_VALID_PFX(pfx
));
1298 vassert(sz
== 8 || sz
== 4 || sz
== 2 || sz
== 1);
1299 reg
= gregOfRexRM( pfx
, mod_reg_rm
);
1300 Bool irregular
= sz
== 1 && !haveREX(pfx
) && reg
>= 4 && reg
< 8;
1301 return offsetIReg( sz
, reg
, irregular
);
1305 IRExpr
* getIRegG ( Int sz
, Prefix pfx
, UChar mod_reg_rm
)
1309 return IRExpr_Get( offsetIRegG( 8, pfx
, mod_reg_rm
), Ity_I64
);
1312 return unop(Iop_64to32
,
1313 IRExpr_Get( offsetIRegG( 8, pfx
, mod_reg_rm
), Ity_I64
));
1316 return unop(Iop_64to16
,
1317 IRExpr_Get( offsetIRegG( 8, pfx
, mod_reg_rm
), Ity_I64
));
1320 UInt regNo
= gregOfRexRM( pfx
, mod_reg_rm
);
1321 Bool irregular
= !haveREX(pfx
) && regNo
>= 4 && regNo
< 8;
1323 return IRExpr_Get( offsetIRegG( 1, pfx
, mod_reg_rm
), Ity_I8
);
1325 return unop(Iop_64to8
,
1326 IRExpr_Get( offsetIRegG( 8, pfx
, mod_reg_rm
),
1337 void putIRegG ( Int sz
, Prefix pfx
, UChar mod_reg_rm
, IRExpr
* e
)
1339 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == szToITy(sz
));
1341 e
= unop(Iop_32Uto64
,e
);
1343 stmt( IRStmt_Put( offsetIRegG( sz
, pfx
, mod_reg_rm
), e
) );
1347 const HChar
* nameIRegG ( Int sz
, Prefix pfx
, UChar mod_reg_rm
)
1349 UInt regNo
= gregOfRexRM( pfx
, mod_reg_rm
);
1350 Bool irregular
= sz
== 1 && !haveREX(pfx
) && regNo
>= 4 && regNo
< 8;
1351 return nameIReg( sz
, gregOfRexRM(pfx
,mod_reg_rm
), irregular
);
1356 IRExpr
* getIRegV ( Int sz
, Prefix pfx
)
1358 vassert(sz
== 8 || sz
== 4);
1360 return unop(Iop_64to32
,
1361 IRExpr_Get( offsetIReg( 8, getVexNvvvv(pfx
), False
),
1363 } else if (sz
== 2) {
1364 return unop(Iop_64to16
,
1365 IRExpr_Get( offsetIReg( 8, getVexNvvvv(pfx
), False
),
1368 return IRExpr_Get( offsetIReg( sz
, getVexNvvvv(pfx
), False
),
1374 void putIRegV ( Int sz
, Prefix pfx
, IRExpr
* e
)
1376 vassert(sz
== 8 || sz
== 4);
1377 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == szToITy(sz
));
1379 e
= unop(Iop_32Uto64
,e
);
1381 stmt( IRStmt_Put( offsetIReg( sz
, getVexNvvvv(pfx
), False
), e
) );
1385 const HChar
* nameIRegV ( Int sz
, Prefix pfx
)
1387 vassert(sz
== 8 || sz
== 4);
1388 return nameIReg( sz
, getVexNvvvv(pfx
), False
);
1393 /* Produce the guest state offset for a reference to the 'e' register
1394 field in a modrm byte, taking into account REX (or its absence),
1395 and the size of the access. eregOfRexRM will assert if mod_reg_rm
1396 denotes a memory access rather than a register access.
1398 static UInt
offsetIRegE ( Int sz
, Prefix pfx
, UChar mod_reg_rm
)
1401 vassert(host_endness
== VexEndnessLE
);
1402 vassert(IS_VALID_PFX(pfx
));
1403 vassert(sz
== 8 || sz
== 4 || sz
== 2 || sz
== 1);
1404 reg
= eregOfRexRM( pfx
, mod_reg_rm
);
1405 Bool irregular
= sz
== 1 && !haveREX(pfx
) && (reg
>= 4 && reg
< 8);
1406 return offsetIReg( sz
, reg
, irregular
);
1410 IRExpr
* getIRegE ( Int sz
, Prefix pfx
, UChar mod_reg_rm
)
1414 return IRExpr_Get( offsetIRegE( 8, pfx
, mod_reg_rm
), Ity_I64
);
1417 return unop(Iop_64to32
,
1418 IRExpr_Get( offsetIRegE( 8, pfx
, mod_reg_rm
), Ity_I64
));
1421 return unop(Iop_64to16
,
1422 IRExpr_Get( offsetIRegE( 8, pfx
, mod_reg_rm
), Ity_I64
));
1425 UInt regNo
= eregOfRexRM( pfx
, mod_reg_rm
);
1426 Bool irregular
= !haveREX(pfx
) && regNo
>= 4 && regNo
< 8;
1428 return IRExpr_Get( offsetIRegE( 1, pfx
, mod_reg_rm
), Ity_I8
);
1430 return unop(Iop_64to8
,
1431 IRExpr_Get( offsetIRegE( 8, pfx
, mod_reg_rm
),
1442 void putIRegE ( Int sz
, Prefix pfx
, UChar mod_reg_rm
, IRExpr
* e
)
1444 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == szToITy(sz
));
1446 e
= unop(Iop_32Uto64
,e
);
1448 stmt( IRStmt_Put( offsetIRegE( sz
, pfx
, mod_reg_rm
), e
) );
1452 const HChar
* nameIRegE ( Int sz
, Prefix pfx
, UChar mod_reg_rm
)
1454 UInt regNo
= eregOfRexRM( pfx
, mod_reg_rm
);
1455 Bool irregular
= sz
== 1 && !haveREX(pfx
) && regNo
>= 4 && regNo
< 8;
1456 return nameIReg( sz
, eregOfRexRM(pfx
,mod_reg_rm
), irregular
);
1460 /*------------------------------------------------------------*/
1461 /*--- For dealing with XMM registers ---*/
1462 /*------------------------------------------------------------*/
1464 static Int
ymmGuestRegOffset ( UInt ymmreg
)
1467 case 0: return OFFB_YMM0
;
1468 case 1: return OFFB_YMM1
;
1469 case 2: return OFFB_YMM2
;
1470 case 3: return OFFB_YMM3
;
1471 case 4: return OFFB_YMM4
;
1472 case 5: return OFFB_YMM5
;
1473 case 6: return OFFB_YMM6
;
1474 case 7: return OFFB_YMM7
;
1475 case 8: return OFFB_YMM8
;
1476 case 9: return OFFB_YMM9
;
1477 case 10: return OFFB_YMM10
;
1478 case 11: return OFFB_YMM11
;
1479 case 12: return OFFB_YMM12
;
1480 case 13: return OFFB_YMM13
;
1481 case 14: return OFFB_YMM14
;
1482 case 15: return OFFB_YMM15
;
1483 default: vpanic("ymmGuestRegOffset(amd64)");
1487 static Int
xmmGuestRegOffset ( UInt xmmreg
)
1489 /* Correct for little-endian host only. */
1490 vassert(host_endness
== VexEndnessLE
);
1491 return ymmGuestRegOffset( xmmreg
);
1494 /* Lanes of vector registers are always numbered from zero being the
1495 least significant lane (rightmost in the register). */
1497 static Int
xmmGuestRegLane16offset ( UInt xmmreg
, Int laneno
)
1499 /* Correct for little-endian host only. */
1500 vassert(host_endness
== VexEndnessLE
);
1501 vassert(laneno
>= 0 && laneno
< 8);
1502 return xmmGuestRegOffset( xmmreg
) + 2 * laneno
;
1505 static Int
xmmGuestRegLane32offset ( UInt xmmreg
, Int laneno
)
1507 /* Correct for little-endian host only. */
1508 vassert(host_endness
== VexEndnessLE
);
1509 vassert(laneno
>= 0 && laneno
< 4);
1510 return xmmGuestRegOffset( xmmreg
) + 4 * laneno
;
1513 static Int
xmmGuestRegLane64offset ( UInt xmmreg
, Int laneno
)
1515 /* Correct for little-endian host only. */
1516 vassert(host_endness
== VexEndnessLE
);
1517 vassert(laneno
>= 0 && laneno
< 2);
1518 return xmmGuestRegOffset( xmmreg
) + 8 * laneno
;
1521 static Int
ymmGuestRegLane128offset ( UInt ymmreg
, Int laneno
)
1523 /* Correct for little-endian host only. */
1524 vassert(host_endness
== VexEndnessLE
);
1525 vassert(laneno
>= 0 && laneno
< 2);
1526 return ymmGuestRegOffset( ymmreg
) + 16 * laneno
;
1529 static Int
ymmGuestRegLane64offset ( UInt ymmreg
, Int laneno
)
1531 /* Correct for little-endian host only. */
1532 vassert(host_endness
== VexEndnessLE
);
1533 vassert(laneno
>= 0 && laneno
< 4);
1534 return ymmGuestRegOffset( ymmreg
) + 8 * laneno
;
1537 static Int
ymmGuestRegLane32offset ( UInt ymmreg
, Int laneno
)
1539 /* Correct for little-endian host only. */
1540 vassert(host_endness
== VexEndnessLE
);
1541 vassert(laneno
>= 0 && laneno
< 8);
1542 return ymmGuestRegOffset( ymmreg
) + 4 * laneno
;
1545 static IRExpr
* getXMMReg ( UInt xmmreg
)
1547 return IRExpr_Get( xmmGuestRegOffset(xmmreg
), Ity_V128
);
1550 static IRExpr
* getXMMRegLane64 ( UInt xmmreg
, Int laneno
)
1552 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg
,laneno
), Ity_I64
);
1555 static IRExpr
* getXMMRegLane64F ( UInt xmmreg
, Int laneno
)
1557 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg
,laneno
), Ity_F64
);
1560 static IRExpr
* getXMMRegLane32 ( UInt xmmreg
, Int laneno
)
1562 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg
,laneno
), Ity_I32
);
1565 static IRExpr
* getXMMRegLane32F ( UInt xmmreg
, Int laneno
)
1567 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg
,laneno
), Ity_F32
);
1570 static IRExpr
* getXMMRegLane16 ( UInt xmmreg
, Int laneno
)
1572 return IRExpr_Get( xmmGuestRegLane16offset(xmmreg
,laneno
), Ity_I16
);
1575 static void putXMMReg ( UInt xmmreg
, IRExpr
* e
)
1577 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_V128
);
1578 stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg
), e
) );
1581 static void putXMMRegLane64 ( UInt xmmreg
, Int laneno
, IRExpr
* e
)
1583 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I64
);
1584 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg
,laneno
), e
) );
1587 static void putXMMRegLane64F ( UInt xmmreg
, Int laneno
, IRExpr
* e
)
1589 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_F64
);
1590 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg
,laneno
), e
) );
1593 static void putXMMRegLane32F ( UInt xmmreg
, Int laneno
, IRExpr
* e
)
1595 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_F32
);
1596 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg
,laneno
), e
) );
1599 static void putXMMRegLane32 ( UInt xmmreg
, Int laneno
, IRExpr
* e
)
1601 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I32
);
1602 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg
,laneno
), e
) );
1605 static IRExpr
* getYMMReg ( UInt xmmreg
)
1607 return IRExpr_Get( ymmGuestRegOffset(xmmreg
), Ity_V256
);
1610 static IRExpr
* getYMMRegLane128 ( UInt ymmreg
, Int laneno
)
1612 return IRExpr_Get( ymmGuestRegLane128offset(ymmreg
,laneno
), Ity_V128
);
1615 static IRExpr
* getYMMRegLane64F ( UInt ymmreg
, Int laneno
)
1617 return IRExpr_Get( ymmGuestRegLane64offset(ymmreg
,laneno
), Ity_F64
);
1620 static IRExpr
* getYMMRegLane64 ( UInt ymmreg
, Int laneno
)
1622 return IRExpr_Get( ymmGuestRegLane64offset(ymmreg
,laneno
), Ity_I64
);
1625 static IRExpr
* getYMMRegLane32F ( UInt ymmreg
, Int laneno
)
1627 return IRExpr_Get( ymmGuestRegLane32offset(ymmreg
,laneno
), Ity_F32
);
1630 static IRExpr
* getYMMRegLane32 ( UInt ymmreg
, Int laneno
)
1632 return IRExpr_Get( ymmGuestRegLane32offset(ymmreg
,laneno
), Ity_I32
);
1635 static void putYMMReg ( UInt ymmreg
, IRExpr
* e
)
1637 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_V256
);
1638 stmt( IRStmt_Put( ymmGuestRegOffset(ymmreg
), e
) );
1641 static void putYMMRegLane128 ( UInt ymmreg
, Int laneno
, IRExpr
* e
)
1643 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_V128
);
1644 stmt( IRStmt_Put( ymmGuestRegLane128offset(ymmreg
,laneno
), e
) );
1647 static void putYMMRegLane64F ( UInt ymmreg
, Int laneno
, IRExpr
* e
)
1649 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_F64
);
1650 stmt( IRStmt_Put( ymmGuestRegLane64offset(ymmreg
,laneno
), e
) );
1653 static void putYMMRegLane64 ( UInt ymmreg
, Int laneno
, IRExpr
* e
)
1655 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I64
);
1656 stmt( IRStmt_Put( ymmGuestRegLane64offset(ymmreg
,laneno
), e
) );
1659 static void putYMMRegLane32F ( UInt ymmreg
, Int laneno
, IRExpr
* e
)
1661 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_F32
);
1662 stmt( IRStmt_Put( ymmGuestRegLane32offset(ymmreg
,laneno
), e
) );
1665 static void putYMMRegLane32 ( UInt ymmreg
, Int laneno
, IRExpr
* e
)
1667 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I32
);
1668 stmt( IRStmt_Put( ymmGuestRegLane32offset(ymmreg
,laneno
), e
) );
1671 static IRExpr
* mkV128 ( UShort mask
)
1673 return IRExpr_Const(IRConst_V128(mask
));
1676 /* Write the low half of a YMM reg and zero out the upper half. */
1677 static void putYMMRegLoAndZU ( UInt ymmreg
, IRExpr
* e
)
1679 putYMMRegLane128( ymmreg
, 0, e
);
1680 putYMMRegLane128( ymmreg
, 1, mkV128(0) );
1683 static IRExpr
* mkAnd1 ( IRExpr
* x
, IRExpr
* y
)
1685 vassert(typeOfIRExpr(irsb
->tyenv
,x
) == Ity_I1
);
1686 vassert(typeOfIRExpr(irsb
->tyenv
,y
) == Ity_I1
);
1687 return unop(Iop_64to1
,
1690 unop(Iop_1Uto64
,y
)));
1693 /* Generate a compare-and-swap operation, operating on memory at
1694 'addr'. The expected value is 'expVal' and the new value is
1695 'newVal'. If the operation fails, then transfer control (with a
1696 no-redir jump (XXX no -- see comment at top of this file)) to
1697 'restart_point', which is presumably the address of the guest
1698 instruction again -- retrying, essentially. */
1699 static void casLE ( IRExpr
* addr
, IRExpr
* expVal
, IRExpr
* newVal
,
1700 Addr64 restart_point
)
1703 IRType tyE
= typeOfIRExpr(irsb
->tyenv
, expVal
);
1704 IRType tyN
= typeOfIRExpr(irsb
->tyenv
, newVal
);
1705 IRTemp oldTmp
= newTemp(tyE
);
1706 IRTemp expTmp
= newTemp(tyE
);
1707 vassert(tyE
== tyN
);
1708 vassert(tyE
== Ity_I64
|| tyE
== Ity_I32
1709 || tyE
== Ity_I16
|| tyE
== Ity_I8
);
1710 assign(expTmp
, expVal
);
1711 cas
= mkIRCAS( IRTemp_INVALID
, oldTmp
, Iend_LE
, addr
,
1712 NULL
, mkexpr(expTmp
), NULL
, newVal
);
1713 stmt( IRStmt_CAS(cas
) );
1715 binop( mkSizedOp(tyE
,Iop_CasCmpNE8
),
1716 mkexpr(oldTmp
), mkexpr(expTmp
) ),
1717 Ijk_Boring
, /*Ijk_NoRedir*/
1718 IRConst_U64( restart_point
),
1724 /*------------------------------------------------------------*/
1725 /*--- Helpers for %rflags. ---*/
1726 /*------------------------------------------------------------*/
1728 /* -------------- Evaluating the flags-thunk. -------------- */
1730 /* Build IR to calculate all the eflags from stored
1731 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1733 static IRExpr
* mk_amd64g_calculate_rflags_all ( void )
1736 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP
, Ity_I64
),
1737 IRExpr_Get(OFFB_CC_DEP1
, Ity_I64
),
1738 IRExpr_Get(OFFB_CC_DEP2
, Ity_I64
),
1739 IRExpr_Get(OFFB_CC_NDEP
, Ity_I64
) );
1744 "amd64g_calculate_rflags_all", &amd64g_calculate_rflags_all
,
1747 /* Exclude OP and NDEP from definedness checking. We're only
1748 interested in DEP1 and DEP2. */
1749 call
->Iex
.CCall
.cee
->mcx_mask
= (1<<0) | (1<<3);
1753 /* Build IR to calculate some particular condition from stored
1754 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1756 static IRExpr
* mk_amd64g_calculate_condition ( AMD64Condcode cond
)
1759 = mkIRExprVec_5( mkU64(cond
),
1760 IRExpr_Get(OFFB_CC_OP
, Ity_I64
),
1761 IRExpr_Get(OFFB_CC_DEP1
, Ity_I64
),
1762 IRExpr_Get(OFFB_CC_DEP2
, Ity_I64
),
1763 IRExpr_Get(OFFB_CC_NDEP
, Ity_I64
) );
1768 "amd64g_calculate_condition", &amd64g_calculate_condition
,
1771 /* Exclude the requested condition, OP and NDEP from definedness
1772 checking. We're only interested in DEP1 and DEP2. */
1773 call
->Iex
.CCall
.cee
->mcx_mask
= (1<<0) | (1<<1) | (1<<4);
1774 return unop(Iop_64to1
, call
);
1777 /* Build IR to calculate just the carry flag from stored
1778 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I64. */
1779 static IRExpr
* mk_amd64g_calculate_rflags_c ( void )
1782 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP
, Ity_I64
),
1783 IRExpr_Get(OFFB_CC_DEP1
, Ity_I64
),
1784 IRExpr_Get(OFFB_CC_DEP2
, Ity_I64
),
1785 IRExpr_Get(OFFB_CC_NDEP
, Ity_I64
) );
1790 "amd64g_calculate_rflags_c", &amd64g_calculate_rflags_c
,
1793 /* Exclude OP and NDEP from definedness checking. We're only
1794 interested in DEP1 and DEP2. */
1795 call
->Iex
.CCall
.cee
->mcx_mask
= (1<<0) | (1<<3);
1800 /* -------------- Building the flags-thunk. -------------- */
1802 /* The machinery in this section builds the flag-thunk following a
1803 flag-setting operation. Hence the various setFlags_* functions.
1806 static Bool
isAddSub ( IROp op8
)
1808 return toBool(op8
== Iop_Add8
|| op8
== Iop_Sub8
);
1811 static Bool
isLogic ( IROp op8
)
1813 return toBool(op8
== Iop_And8
|| op8
== Iop_Or8
|| op8
== Iop_Xor8
);
1816 /* U-widen 1/8/16/32/64 bit int expr to 64. */
1817 static IRExpr
* widenUto64 ( IRExpr
* e
)
1819 switch (typeOfIRExpr(irsb
->tyenv
,e
)) {
1820 case Ity_I64
: return e
;
1821 case Ity_I32
: return unop(Iop_32Uto64
, e
);
1822 case Ity_I16
: return unop(Iop_16Uto64
, e
);
1823 case Ity_I8
: return unop(Iop_8Uto64
, e
);
1824 case Ity_I1
: return unop(Iop_1Uto64
, e
);
1825 default: vpanic("widenUto64");
1829 /* S-widen 8/16/32/64 bit int expr to 32. */
1830 static IRExpr
* widenSto64 ( IRExpr
* e
)
1832 switch (typeOfIRExpr(irsb
->tyenv
,e
)) {
1833 case Ity_I64
: return e
;
1834 case Ity_I32
: return unop(Iop_32Sto64
, e
);
1835 case Ity_I16
: return unop(Iop_16Sto64
, e
);
1836 case Ity_I8
: return unop(Iop_8Sto64
, e
);
1837 default: vpanic("widenSto64");
1841 /* Narrow 8/16/32/64 bit int expr to 8/16/32/64. Clearly only some
1842 of these combinations make sense. */
1843 static IRExpr
* narrowTo ( IRType dst_ty
, IRExpr
* e
)
1845 IRType src_ty
= typeOfIRExpr(irsb
->tyenv
,e
);
1846 if (src_ty
== dst_ty
)
1848 if (src_ty
== Ity_I32
&& dst_ty
== Ity_I16
)
1849 return unop(Iop_32to16
, e
);
1850 if (src_ty
== Ity_I32
&& dst_ty
== Ity_I8
)
1851 return unop(Iop_32to8
, e
);
1852 if (src_ty
== Ity_I64
&& dst_ty
== Ity_I32
)
1853 return unop(Iop_64to32
, e
);
1854 if (src_ty
== Ity_I64
&& dst_ty
== Ity_I16
)
1855 return unop(Iop_64to16
, e
);
1856 if (src_ty
== Ity_I64
&& dst_ty
== Ity_I8
)
1857 return unop(Iop_64to8
, e
);
1859 vex_printf("\nsrc, dst tys are: ");
1864 vpanic("narrowTo(amd64)");
1868 /* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is
1869 auto-sized up to the real op. */
1872 void setFlags_DEP1_DEP2 ( IROp op8
, IRTemp dep1
, IRTemp dep2
, IRType ty
)
1876 case Ity_I8
: ccOp
= 0; break;
1877 case Ity_I16
: ccOp
= 1; break;
1878 case Ity_I32
: ccOp
= 2; break;
1879 case Ity_I64
: ccOp
= 3; break;
1880 default: vassert(0);
1883 case Iop_Add8
: ccOp
+= AMD64G_CC_OP_ADDB
; break;
1884 case Iop_Sub8
: ccOp
+= AMD64G_CC_OP_SUBB
; break;
1885 default: ppIROp(op8
);
1886 vpanic("setFlags_DEP1_DEP2(amd64)");
1888 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(ccOp
)) );
1889 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(dep1
))) );
1890 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto64(mkexpr(dep2
))) );
1891 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
1895 /* Set the OP and DEP1 fields only, and write zero to DEP2. */
1898 void setFlags_DEP1 ( IROp op8
, IRTemp dep1
, IRType ty
)
1902 case Ity_I8
: ccOp
= 0; break;
1903 case Ity_I16
: ccOp
= 1; break;
1904 case Ity_I32
: ccOp
= 2; break;
1905 case Ity_I64
: ccOp
= 3; break;
1906 default: vassert(0);
1911 case Iop_Xor8
: ccOp
+= AMD64G_CC_OP_LOGICB
; break;
1912 default: ppIROp(op8
);
1913 vpanic("setFlags_DEP1(amd64)");
1915 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(ccOp
)) );
1916 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(dep1
))) );
1917 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0)) );
1918 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
1922 /* For shift operations, we put in the result and the undershifted
1923 result. Except if the shift amount is zero, the thunk is left
1926 static void setFlags_DEP1_DEP2_shift ( IROp op64
,
1934 case Ity_I8
: ccOp
= 0; break;
1935 case Ity_I16
: ccOp
= 1; break;
1936 case Ity_I32
: ccOp
= 2; break;
1937 case Ity_I64
: ccOp
= 3; break;
1938 default: vassert(0);
1943 /* Both kinds of right shifts are handled by the same thunk
1947 case Iop_Sar64
: ccOp
+= AMD64G_CC_OP_SHRB
; break;
1948 case Iop_Shl64
: ccOp
+= AMD64G_CC_OP_SHLB
; break;
1949 default: ppIROp(op64
);
1950 vpanic("setFlags_DEP1_DEP2_shift(amd64)");
1953 /* guard :: Ity_I8. We need to convert it to I1. */
1954 IRTemp guardB
= newTemp(Ity_I1
);
1955 assign( guardB
, binop(Iop_CmpNE8
, mkexpr(guard
), mkU8(0)) );
1957 /* DEP1 contains the result, DEP2 contains the undershifted value. */
1958 stmt( IRStmt_Put( OFFB_CC_OP
,
1959 IRExpr_ITE( mkexpr(guardB
),
1961 IRExpr_Get(OFFB_CC_OP
,Ity_I64
) ) ));
1962 stmt( IRStmt_Put( OFFB_CC_DEP1
,
1963 IRExpr_ITE( mkexpr(guardB
),
1964 widenUto64(mkexpr(res
)),
1965 IRExpr_Get(OFFB_CC_DEP1
,Ity_I64
) ) ));
1966 stmt( IRStmt_Put( OFFB_CC_DEP2
,
1967 IRExpr_ITE( mkexpr(guardB
),
1968 widenUto64(mkexpr(resUS
)),
1969 IRExpr_Get(OFFB_CC_DEP2
,Ity_I64
) ) ));
1970 stmt( IRStmt_Put( OFFB_CC_NDEP
,
1975 /* For the inc/dec case, we store in DEP1 the result value and in NDEP
1976 the former value of the carry flag, which unfortunately we have to
1979 static void setFlags_INC_DEC ( Bool inc
, IRTemp res
, IRType ty
)
1981 Int ccOp
= inc
? AMD64G_CC_OP_INCB
: AMD64G_CC_OP_DECB
;
1984 case Ity_I8
: ccOp
+= 0; break;
1985 case Ity_I16
: ccOp
+= 1; break;
1986 case Ity_I32
: ccOp
+= 2; break;
1987 case Ity_I64
: ccOp
+= 3; break;
1988 default: vassert(0);
1991 /* This has to come first, because calculating the C flag
1992 may require reading all four thunk fields. */
1993 stmt( IRStmt_Put( OFFB_CC_NDEP
, mk_amd64g_calculate_rflags_c()) );
1994 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(ccOp
)) );
1995 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(res
))) );
1996 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0)) );
2000 /* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the
2004 void setFlags_MUL ( IRType ty
, IRTemp arg1
, IRTemp arg2
, ULong base_op
)
2008 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(base_op
+0) ) );
2011 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(base_op
+1) ) );
2014 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(base_op
+2) ) );
2017 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(base_op
+3) ) );
2020 vpanic("setFlags_MUL(amd64)");
2022 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(arg1
)) ));
2023 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto64(mkexpr(arg2
)) ));
2024 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
2028 /* -------------- Condition codes. -------------- */
2030 /* Condition codes, using the AMD encoding. */
2032 static const HChar
* name_AMD64Condcode ( AMD64Condcode cond
)
2035 case AMD64CondO
: return "o";
2036 case AMD64CondNO
: return "no";
2037 case AMD64CondB
: return "b";
2038 case AMD64CondNB
: return "ae"; /*"nb";*/
2039 case AMD64CondZ
: return "e"; /*"z";*/
2040 case AMD64CondNZ
: return "ne"; /*"nz";*/
2041 case AMD64CondBE
: return "be";
2042 case AMD64CondNBE
: return "a"; /*"nbe";*/
2043 case AMD64CondS
: return "s";
2044 case AMD64CondNS
: return "ns";
2045 case AMD64CondP
: return "p";
2046 case AMD64CondNP
: return "np";
2047 case AMD64CondL
: return "l";
2048 case AMD64CondNL
: return "ge"; /*"nl";*/
2049 case AMD64CondLE
: return "le";
2050 case AMD64CondNLE
: return "g"; /*"nle";*/
2051 case AMD64CondAlways
: return "ALWAYS";
2052 default: vpanic("name_AMD64Condcode");
2057 AMD64Condcode
positiveIse_AMD64Condcode ( AMD64Condcode cond
,
2058 /*OUT*/Bool
* needInvert
)
2060 vassert(cond
>= AMD64CondO
&& cond
<= AMD64CondNLE
);
2065 *needInvert
= False
;
2071 /* -------------- Helpers for ADD/SUB with carry. -------------- */
2073 /* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags
2076 Optionally, generate a store for the 'tres' value. This can either
2077 be a normal store, or it can be a cas-with-possible-failure style
2080 if taddr is IRTemp_INVALID, then no store is generated.
2082 if taddr is not IRTemp_INVALID, then a store (using taddr as
2083 the address) is generated:
2085 if texpVal is IRTemp_INVALID then a normal store is
2086 generated, and restart_point must be zero (it is irrelevant).
2088 if texpVal is not IRTemp_INVALID then a cas-style store is
2089 generated. texpVal is the expected value, restart_point
2090 is the restart point if the store fails, and texpVal must
2091 have the same type as tres.
2094 static void helper_ADC ( Int sz
,
2095 IRTemp tres
, IRTemp ta1
, IRTemp ta2
,
2096 /* info about optional store: */
2097 IRTemp taddr
, IRTemp texpVal
, Addr64 restart_point
)
2100 IRType ty
= szToITy(sz
);
2101 IRTemp oldc
= newTemp(Ity_I64
);
2102 IRTemp oldcn
= newTemp(ty
);
2103 IROp plus
= mkSizedOp(ty
, Iop_Add8
);
2104 IROp
xor = mkSizedOp(ty
, Iop_Xor8
);
2106 vassert(typeOfIRTemp(irsb
->tyenv
, tres
) == ty
);
2109 case 8: thunkOp
= AMD64G_CC_OP_ADCQ
; break;
2110 case 4: thunkOp
= AMD64G_CC_OP_ADCL
; break;
2111 case 2: thunkOp
= AMD64G_CC_OP_ADCW
; break;
2112 case 1: thunkOp
= AMD64G_CC_OP_ADCB
; break;
2113 default: vassert(0);
2116 /* oldc = old carry flag, 0 or 1 */
2117 assign( oldc
, binop(Iop_And64
,
2118 mk_amd64g_calculate_rflags_c(),
2121 assign( oldcn
, narrowTo(ty
, mkexpr(oldc
)) );
2123 assign( tres
, binop(plus
,
2124 binop(plus
,mkexpr(ta1
),mkexpr(ta2
)),
2127 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
2128 start of this function. */
2129 if (taddr
!= IRTemp_INVALID
) {
2130 if (texpVal
== IRTemp_INVALID
) {
2131 vassert(restart_point
== 0);
2132 storeLE( mkexpr(taddr
), mkexpr(tres
) );
2134 vassert(typeOfIRTemp(irsb
->tyenv
, texpVal
) == ty
);
2135 /* .. and hence 'texpVal' has the same type as 'tres'. */
2136 casLE( mkexpr(taddr
),
2137 mkexpr(texpVal
), mkexpr(tres
), restart_point
);
2141 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(thunkOp
) ) );
2142 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(ta1
)) ));
2143 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto64(binop(xor, mkexpr(ta2
),
2144 mkexpr(oldcn
)) )) );
2145 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkexpr(oldc
) ) );
2149 /* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags
2150 appropriately. As with helper_ADC, possibly generate a store of
2151 the result -- see comments on helper_ADC for details.
2153 static void helper_SBB ( Int sz
,
2154 IRTemp tres
, IRTemp ta1
, IRTemp ta2
,
2155 /* info about optional store: */
2156 IRTemp taddr
, IRTemp texpVal
, Addr64 restart_point
)
2159 IRType ty
= szToITy(sz
);
2160 IRTemp oldc
= newTemp(Ity_I64
);
2161 IRTemp oldcn
= newTemp(ty
);
2162 IROp minus
= mkSizedOp(ty
, Iop_Sub8
);
2163 IROp
xor = mkSizedOp(ty
, Iop_Xor8
);
2165 vassert(typeOfIRTemp(irsb
->tyenv
, tres
) == ty
);
2168 case 8: thunkOp
= AMD64G_CC_OP_SBBQ
; break;
2169 case 4: thunkOp
= AMD64G_CC_OP_SBBL
; break;
2170 case 2: thunkOp
= AMD64G_CC_OP_SBBW
; break;
2171 case 1: thunkOp
= AMD64G_CC_OP_SBBB
; break;
2172 default: vassert(0);
2175 /* oldc = old carry flag, 0 or 1 */
2176 assign( oldc
, binop(Iop_And64
,
2177 mk_amd64g_calculate_rflags_c(),
2180 assign( oldcn
, narrowTo(ty
, mkexpr(oldc
)) );
2182 assign( tres
, binop(minus
,
2183 binop(minus
,mkexpr(ta1
),mkexpr(ta2
)),
2186 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
2187 start of this function. */
2188 if (taddr
!= IRTemp_INVALID
) {
2189 if (texpVal
== IRTemp_INVALID
) {
2190 vassert(restart_point
== 0);
2191 storeLE( mkexpr(taddr
), mkexpr(tres
) );
2193 vassert(typeOfIRTemp(irsb
->tyenv
, texpVal
) == ty
);
2194 /* .. and hence 'texpVal' has the same type as 'tres'. */
2195 casLE( mkexpr(taddr
),
2196 mkexpr(texpVal
), mkexpr(tres
), restart_point
);
2200 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(thunkOp
) ) );
2201 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(ta1
) )) );
2202 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto64(binop(xor, mkexpr(ta2
),
2203 mkexpr(oldcn
)) )) );
2204 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkexpr(oldc
) ) );
2208 /* Given ta1, ta2 and tres, compute tres = ADCX(ta1,ta2) or tres = ADOX(ta1,ta2)
2209 and set flags appropriately.
2211 static void helper_ADCX_ADOX ( Bool isADCX
, Int sz
,
2212 IRTemp tres
, IRTemp ta1
, IRTemp ta2
)
2215 IRType ty
= szToITy(sz
);
2216 IRTemp oldflags
= newTemp(Ity_I64
);
2217 IRTemp oldOC
= newTemp(Ity_I64
); // old O or C flag
2218 IRTemp oldOCn
= newTemp(ty
); // old O or C flag, narrowed
2219 IROp plus
= mkSizedOp(ty
, Iop_Add8
);
2220 IROp
xor = mkSizedOp(ty
, Iop_Xor8
);
2222 vassert(typeOfIRTemp(irsb
->tyenv
, tres
) == ty
);
2225 case 8: thunkOp
= isADCX
? AMD64G_CC_OP_ADCX64
2226 : AMD64G_CC_OP_ADOX64
; break;
2227 case 4: thunkOp
= isADCX
? AMD64G_CC_OP_ADCX32
2228 : AMD64G_CC_OP_ADOX32
; break;
2229 default: vassert(0);
2232 assign( oldflags
, mk_amd64g_calculate_rflags_all() );
2234 /* oldOC = old overflow/carry flag, 0 or 1 */
2235 assign( oldOC
, binop(Iop_And64
,
2238 mkU8(isADCX
? AMD64G_CC_SHIFT_C
2239 : AMD64G_CC_SHIFT_O
)),
2242 assign( oldOCn
, narrowTo(ty
, mkexpr(oldOC
)) );
2244 assign( tres
, binop(plus
,
2245 binop(plus
,mkexpr(ta1
),mkexpr(ta2
)),
2248 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(thunkOp
) ) );
2249 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(ta1
)) ));
2250 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto64(binop(xor, mkexpr(ta2
),
2251 mkexpr(oldOCn
)) )) );
2252 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkexpr(oldflags
) ) );
2256 /* -------------- Helpers for disassembly printing. -------------- */
2258 static const HChar
* nameGrp1 ( Int opc_aux
)
2260 static const HChar
* grp1_names
[8]
2261 = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" };
2262 if (opc_aux
< 0 || opc_aux
> 7) vpanic("nameGrp1(amd64)");
2263 return grp1_names
[opc_aux
];
2266 static const HChar
* nameGrp2 ( Int opc_aux
)
2268 static const HChar
* grp2_names
[8]
2269 = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" };
2270 if (opc_aux
< 0 || opc_aux
> 7) vpanic("nameGrp2(amd64)");
2271 return grp2_names
[opc_aux
];
2274 static const HChar
* nameGrp4 ( Int opc_aux
)
2276 static const HChar
* grp4_names
[8]
2277 = { "inc", "dec", "???", "???", "???", "???", "???", "???" };
2278 if (opc_aux
< 0 || opc_aux
> 1) vpanic("nameGrp4(amd64)");
2279 return grp4_names
[opc_aux
];
2282 static const HChar
* nameGrp5 ( Int opc_aux
)
2284 static const HChar
* grp5_names
[8]
2285 = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" };
2286 if (opc_aux
< 0 || opc_aux
> 6) vpanic("nameGrp5(amd64)");
2287 return grp5_names
[opc_aux
];
2290 static const HChar
* nameGrp8 ( Int opc_aux
)
2292 static const HChar
* grp8_names
[8]
2293 = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" };
2294 if (opc_aux
< 4 || opc_aux
> 7) vpanic("nameGrp8(amd64)");
2295 return grp8_names
[opc_aux
];
2298 static const HChar
* nameSReg ( UInt sreg
)
2301 case R_ES
: return "%es";
2302 case R_CS
: return "%cs";
2303 case R_SS
: return "%ss";
2304 case R_DS
: return "%ds";
2305 case R_FS
: return "%fs";
2306 case R_GS
: return "%gs";
2307 default: vpanic("nameSReg(amd64)");
2311 static const HChar
* nameMMXReg ( Int mmxreg
)
2313 static const HChar
* mmx_names
[8]
2314 = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" };
2315 if (mmxreg
< 0 || mmxreg
> 7) vpanic("nameMMXReg(amd64,guest)");
2316 return mmx_names
[mmxreg
];
2319 static const HChar
* nameXMMReg ( Int xmmreg
)
2321 static const HChar
* xmm_names
[16]
2322 = { "%xmm0", "%xmm1", "%xmm2", "%xmm3",
2323 "%xmm4", "%xmm5", "%xmm6", "%xmm7",
2324 "%xmm8", "%xmm9", "%xmm10", "%xmm11",
2325 "%xmm12", "%xmm13", "%xmm14", "%xmm15" };
2326 if (xmmreg
< 0 || xmmreg
> 15) vpanic("nameXMMReg(amd64)");
2327 return xmm_names
[xmmreg
];
2330 static const HChar
* nameMMXGran ( Int gran
)
2337 default: vpanic("nameMMXGran(amd64,guest)");
2341 static HChar
nameISize ( Int size
)
2348 default: vpanic("nameISize(amd64)");
2352 static const HChar
* nameYMMReg ( Int ymmreg
)
2354 static const HChar
* ymm_names
[16]
2355 = { "%ymm0", "%ymm1", "%ymm2", "%ymm3",
2356 "%ymm4", "%ymm5", "%ymm6", "%ymm7",
2357 "%ymm8", "%ymm9", "%ymm10", "%ymm11",
2358 "%ymm12", "%ymm13", "%ymm14", "%ymm15" };
2359 if (ymmreg
< 0 || ymmreg
> 15) vpanic("nameYMMReg(amd64)");
2360 return ymm_names
[ymmreg
];
2364 /*------------------------------------------------------------*/
2365 /*--- JMP helpers ---*/
2366 /*------------------------------------------------------------*/
2368 static void jmp_lit( /*MOD*/DisResult
* dres
,
2369 IRJumpKind kind
, Addr64 d64
)
2371 vassert(dres
->whatNext
== Dis_Continue
);
2372 vassert(dres
->len
== 0);
2373 vassert(dres
->jk_StopHere
== Ijk_INVALID
);
2374 dres
->whatNext
= Dis_StopHere
;
2375 dres
->jk_StopHere
= kind
;
2376 stmt( IRStmt_Put( OFFB_RIP
, mkU64(d64
) ) );
2379 static void jmp_treg( /*MOD*/DisResult
* dres
,
2380 IRJumpKind kind
, IRTemp t
)
2382 vassert(dres
->whatNext
== Dis_Continue
);
2383 vassert(dres
->len
== 0);
2384 vassert(dres
->jk_StopHere
== Ijk_INVALID
);
2385 dres
->whatNext
= Dis_StopHere
;
2386 dres
->jk_StopHere
= kind
;
2387 stmt( IRStmt_Put( OFFB_RIP
, mkexpr(t
) ) );
2391 void jcc_01 ( /*MOD*/DisResult
* dres
,
2392 AMD64Condcode cond
, Addr64 d64_false
, Addr64 d64_true
)
2395 AMD64Condcode condPos
;
2396 vassert(dres
->whatNext
== Dis_Continue
);
2397 vassert(dres
->len
== 0);
2398 vassert(dres
->jk_StopHere
== Ijk_INVALID
);
2399 dres
->whatNext
= Dis_StopHere
;
2400 dres
->jk_StopHere
= Ijk_Boring
;
2401 condPos
= positiveIse_AMD64Condcode ( cond
, &invert
);
2403 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos
),
2405 IRConst_U64(d64_false
),
2407 stmt( IRStmt_Put( OFFB_RIP
, mkU64(d64_true
) ) );
2409 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos
),
2411 IRConst_U64(d64_true
),
2413 stmt( IRStmt_Put( OFFB_RIP
, mkU64(d64_false
) ) );
2417 /* Let new_rsp be the %rsp value after a call/return. Let nia be the
2418 guest address of the next instruction to be executed.
2420 This function generates an AbiHint to say that -128(%rsp)
2421 .. -1(%rsp) should now be regarded as uninitialised.
2424 void make_redzone_AbiHint ( const VexAbiInfo
* vbi
,
2425 IRTemp new_rsp
, IRTemp nia
, const HChar
* who
)
2427 Int szB
= vbi
->guest_stack_redzone_size
;
2430 /* A bit of a kludge. Currently the only AbI we've guested AMD64
2431 for is ELF. So just check it's the expected 128 value
2433 vassert(szB
== 128);
2435 if (0) vex_printf("AbiHint: %s\n", who
);
2436 vassert(typeOfIRTemp(irsb
->tyenv
, new_rsp
) == Ity_I64
);
2437 vassert(typeOfIRTemp(irsb
->tyenv
, nia
) == Ity_I64
);
2439 stmt( IRStmt_AbiHint(
2440 binop(Iop_Sub64
, mkexpr(new_rsp
), mkU64(szB
)),
2447 /*------------------------------------------------------------*/
2448 /*--- Disassembling addressing modes ---*/
2449 /*------------------------------------------------------------*/
2452 const HChar
* segRegTxt ( Prefix pfx
)
2454 if (pfx
& PFX_CS
) return "%cs:";
2455 if (pfx
& PFX_DS
) return "%ds:";
2456 if (pfx
& PFX_ES
) return "%es:";
2457 if (pfx
& PFX_FS
) return "%fs:";
2458 if (pfx
& PFX_GS
) return "%gs:";
2459 if (pfx
& PFX_SS
) return "%ss:";
2460 return ""; /* no override */
2464 /* 'virtual' is an IRExpr* holding a virtual address. Convert it to a
2465 linear address by adding any required segment override as indicated
2466 by sorb, and also dealing with any address size override
2469 IRExpr
* handleAddrOverrides ( const VexAbiInfo
* vbi
,
2470 Prefix pfx
, IRExpr
* virtual )
2472 /* --- address size override --- */
2474 virtual = unop(Iop_32Uto64
, unop(Iop_64to32
, virtual));
2476 /* Note that the below are hacks that relies on the assumption
2477 that %fs or %gs are constant.
2478 Typically, %fs is always 0x63 on linux (in the main thread, it
2479 stays at value 0), %gs always 0x60 on Darwin, ... */
2480 /* --- segment overrides --- */
2482 if (vbi
->guest_amd64_assume_fs_is_const
) {
2483 /* return virtual + guest_FS_CONST. */
2484 virtual = binop(Iop_Add64
, virtual,
2485 IRExpr_Get(OFFB_FS_CONST
, Ity_I64
));
2487 unimplemented("amd64 %fs segment override");
2492 if (vbi
->guest_amd64_assume_gs_is_const
) {
2493 /* return virtual + guest_GS_CONST. */
2494 virtual = binop(Iop_Add64
, virtual,
2495 IRExpr_Get(OFFB_GS_CONST
, Ity_I64
));
2497 unimplemented("amd64 %gs segment override");
2501 /* cs, ds, es and ss are simply ignored in 64-bit mode. */
2508 //.. IRType hWordTy;
2509 //.. IRTemp ldt_ptr, gdt_ptr, seg_selector, r64;
2512 //.. /* the common case - no override */
2513 //.. return virtual;
2515 //.. switch (sorb) {
2516 //.. case 0x3E: sreg = R_DS; break;
2517 //.. case 0x26: sreg = R_ES; break;
2518 //.. case 0x64: sreg = R_FS; break;
2519 //.. case 0x65: sreg = R_GS; break;
2520 //.. default: vpanic("handleAddrOverrides(x86,guest)");
2523 //.. hWordTy = sizeof(HWord)==4 ? Ity_I32 : Ity_I64;
2525 //.. seg_selector = newTemp(Ity_I32);
2526 //.. ldt_ptr = newTemp(hWordTy);
2527 //.. gdt_ptr = newTemp(hWordTy);
2528 //.. r64 = newTemp(Ity_I64);
2530 //.. assign( seg_selector, unop(Iop_16Uto32, getSReg(sreg)) );
2531 //.. assign( ldt_ptr, IRExpr_Get( OFFB_LDT, hWordTy ));
2532 //.. assign( gdt_ptr, IRExpr_Get( OFFB_GDT, hWordTy ));
2535 //.. Call this to do the translation and limit checks:
2536 //.. ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
2537 //.. UInt seg_selector, UInt virtual_addr )
2544 //.. "x86g_use_seg_selector",
2545 //.. &x86g_use_seg_selector,
2546 //.. mkIRExprVec_4( mkexpr(ldt_ptr), mkexpr(gdt_ptr),
2547 //.. mkexpr(seg_selector), virtual)
2551 //.. /* If the high 32 of the result are non-zero, there was a
2552 //.. failure in address translation. In which case, make a
2557 //.. binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)),
2559 //.. IRConst_U32( guest_eip_curr_instr )
2563 //.. /* otherwise, here's the translated result. */
2564 //.. return unop(Iop_64to32, mkexpr(r64));
2568 /* Generate IR to calculate an address indicated by a ModRM and
2569 following SIB bytes. The expression, and the number of bytes in
2570 the address mode, are returned (the latter in *len). Note that
2571 this fn should not be called if the R/M part of the address denotes
2572 a register instead of memory. If print_codegen is true, text of
2573 the addressing mode is placed in buf.
2575 The computed address is stored in a new tempreg, and the
2576 identity of the tempreg is returned.
2578 extra_bytes holds the number of bytes after the amode, as supplied
2579 by the caller. This is needed to make sense of %rip-relative
2580 addresses. Note that the value that *len is set to is only the
2581 length of the amode itself and does not include the value supplied
2585 static IRTemp
disAMode_copy2tmp ( IRExpr
* addr64
)
2587 IRTemp tmp
= newTemp(Ity_I64
);
2588 assign( tmp
, addr64
);
2593 IRTemp
disAMode ( /*OUT*/Int
* len
,
2594 const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
,
2595 /*OUT*/HChar
* buf
, Int extra_bytes
)
2597 UChar mod_reg_rm
= getUChar(delta
);
2601 vassert(extra_bytes
>= 0 && extra_bytes
< 10);
2603 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
2604 jump table seems a bit excessive.
2606 mod_reg_rm
&= 0xC7; /* is now XX000YYY */
2607 mod_reg_rm
= toUChar(mod_reg_rm
| (mod_reg_rm
>> 3));
2608 /* is now XX0XXYYY */
2609 mod_reg_rm
&= 0x1F; /* is now 000XXYYY */
2610 switch (mod_reg_rm
) {
2612 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp).
2613 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13).
2615 case 0x00: case 0x01: case 0x02: case 0x03:
2616 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
2617 { UChar rm
= toUChar(mod_reg_rm
& 7);
2618 DIS(buf
, "%s(%s)", segRegTxt(pfx
), nameIRegRexB(8,pfx
,rm
));
2620 return disAMode_copy2tmp(
2621 handleAddrOverrides(vbi
, pfx
, getIRegRexB(8,pfx
,rm
)));
2624 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp)
2625 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12)
2627 case 0x08: case 0x09: case 0x0A: case 0x0B:
2628 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
2629 { UChar rm
= toUChar(mod_reg_rm
& 7);
2630 Long d
= getSDisp8(delta
);
2632 DIS(buf
, "%s(%s)", segRegTxt(pfx
), nameIRegRexB(8,pfx
,rm
));
2634 DIS(buf
, "%s%lld(%s)", segRegTxt(pfx
), d
, nameIRegRexB(8,pfx
,rm
));
2637 return disAMode_copy2tmp(
2638 handleAddrOverrides(vbi
, pfx
,
2639 binop(Iop_Add64
,getIRegRexB(8,pfx
,rm
),mkU64(d
))));
2642 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp)
2643 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12)
2645 case 0x10: case 0x11: case 0x12: case 0x13:
2646 /* ! 14 */ case 0x15: case 0x16: case 0x17:
2647 { UChar rm
= toUChar(mod_reg_rm
& 7);
2648 Long d
= getSDisp32(delta
);
2649 DIS(buf
, "%s%lld(%s)", segRegTxt(pfx
), d
, nameIRegRexB(8,pfx
,rm
));
2651 return disAMode_copy2tmp(
2652 handleAddrOverrides(vbi
, pfx
,
2653 binop(Iop_Add64
,getIRegRexB(8,pfx
,rm
),mkU64(d
))));
2656 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */
2657 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */
2658 case 0x18: case 0x19: case 0x1A: case 0x1B:
2659 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
2660 vpanic("disAMode(amd64): not an addr!");
2662 /* RIP + disp32. This assumes that guest_RIP_curr_instr is set
2663 correctly at the start of handling each instruction. */
2665 { Long d
= getSDisp32(delta
);
2667 DIS(buf
, "%s%lld(%%rip)", segRegTxt(pfx
), d
);
2668 /* We need to know the next instruction's start address.
2669 Try and figure out what it is, record the guess, and ask
2670 the top-level driver logic (bbToIR_AMD64) to check we
2671 guessed right, after the instruction is completely
2673 guest_RIP_next_mustcheck
= True
;
2674 guest_RIP_next_assumed
= guest_RIP_bbstart
2675 + delta
+4 + extra_bytes
;
2676 return disAMode_copy2tmp(
2677 handleAddrOverrides(vbi
, pfx
,
2678 binop(Iop_Add64
, mkU64(guest_RIP_next_assumed
),
2683 /* SIB, with no displacement. Special cases:
2684 -- %rsp cannot act as an index value.
2685 If index_r indicates %rsp, zero is used for the index.
2686 -- when mod is zero and base indicates RBP or R13, base is
2687 instead a 32-bit sign-extended literal.
2688 It's all madness, I tell you. Extract %index, %base and
2689 scale from the SIB byte. The value denoted is then:
2690 | %index == %RSP && (%base == %RBP || %base == %R13)
2691 = d32 following SIB byte
2692 | %index == %RSP && !(%base == %RBP || %base == %R13)
2694 | %index != %RSP && (%base == %RBP || %base == %R13)
2695 = d32 following SIB byte + (%index << scale)
2696 | %index != %RSP && !(%base == %RBP || %base == %R13)
2697 = %base + (%index << scale)
2699 UChar sib
= getUChar(delta
);
2700 UChar scale
= toUChar((sib
>> 6) & 3);
2701 UChar index_r
= toUChar((sib
>> 3) & 7);
2702 UChar base_r
= toUChar(sib
& 7);
2703 /* correct since #(R13) == 8 + #(RBP) */
2704 Bool base_is_BPor13
= toBool(base_r
== R_RBP
);
2705 Bool index_is_SP
= toBool(index_r
== R_RSP
&& 0==getRexX(pfx
));
2708 if ((!index_is_SP
) && (!base_is_BPor13
)) {
2710 DIS(buf
, "%s(%s,%s)", segRegTxt(pfx
),
2711 nameIRegRexB(8,pfx
,base_r
),
2712 nameIReg64rexX(pfx
,index_r
));
2714 DIS(buf
, "%s(%s,%s,%d)", segRegTxt(pfx
),
2715 nameIRegRexB(8,pfx
,base_r
),
2716 nameIReg64rexX(pfx
,index_r
), 1<<scale
);
2721 handleAddrOverrides(vbi
, pfx
,
2723 getIRegRexB(8,pfx
,base_r
),
2724 binop(Iop_Shl64
, getIReg64rexX(pfx
,index_r
),
2728 if ((!index_is_SP
) && base_is_BPor13
) {
2729 Long d
= getSDisp32(delta
);
2730 DIS(buf
, "%s%lld(,%s,%d)", segRegTxt(pfx
), d
,
2731 nameIReg64rexX(pfx
,index_r
), 1<<scale
);
2735 handleAddrOverrides(vbi
, pfx
,
2737 binop(Iop_Shl64
, getIReg64rexX(pfx
,index_r
),
2742 if (index_is_SP
&& (!base_is_BPor13
)) {
2743 DIS(buf
, "%s(%s)", segRegTxt(pfx
), nameIRegRexB(8,pfx
,base_r
));
2745 return disAMode_copy2tmp(
2746 handleAddrOverrides(vbi
, pfx
, getIRegRexB(8,pfx
,base_r
)));
2749 if (index_is_SP
&& base_is_BPor13
) {
2750 Long d
= getSDisp32(delta
);
2751 DIS(buf
, "%s%lld", segRegTxt(pfx
), d
);
2753 return disAMode_copy2tmp(
2754 handleAddrOverrides(vbi
, pfx
, mkU64(d
)));
2760 /* SIB, with 8-bit displacement. Special cases:
2761 -- %esp cannot act as an index value.
2762 If index_r indicates %esp, zero is used for the index.
2767 = d8 + %base + (%index << scale)
2770 UChar sib
= getUChar(delta
);
2771 UChar scale
= toUChar((sib
>> 6) & 3);
2772 UChar index_r
= toUChar((sib
>> 3) & 7);
2773 UChar base_r
= toUChar(sib
& 7);
2774 Long d
= getSDisp8(delta
+1);
2776 if (index_r
== R_RSP
&& 0==getRexX(pfx
)) {
2777 DIS(buf
, "%s%lld(%s)", segRegTxt(pfx
),
2778 d
, nameIRegRexB(8,pfx
,base_r
));
2780 return disAMode_copy2tmp(
2781 handleAddrOverrides(vbi
, pfx
,
2782 binop(Iop_Add64
, getIRegRexB(8,pfx
,base_r
), mkU64(d
)) ));
2785 DIS(buf
, "%s%lld(%s,%s)", segRegTxt(pfx
), d
,
2786 nameIRegRexB(8,pfx
,base_r
),
2787 nameIReg64rexX(pfx
,index_r
));
2789 DIS(buf
, "%s%lld(%s,%s,%d)", segRegTxt(pfx
), d
,
2790 nameIRegRexB(8,pfx
,base_r
),
2791 nameIReg64rexX(pfx
,index_r
), 1<<scale
);
2796 handleAddrOverrides(vbi
, pfx
,
2799 getIRegRexB(8,pfx
,base_r
),
2801 getIReg64rexX(pfx
,index_r
), mkU8(scale
))),
2804 vassert(0); /*NOTREACHED*/
2807 /* SIB, with 32-bit displacement. Special cases:
2808 -- %rsp cannot act as an index value.
2809 If index_r indicates %rsp, zero is used for the index.
2814 = d32 + %base + (%index << scale)
2817 UChar sib
= getUChar(delta
);
2818 UChar scale
= toUChar((sib
>> 6) & 3);
2819 UChar index_r
= toUChar((sib
>> 3) & 7);
2820 UChar base_r
= toUChar(sib
& 7);
2821 Long d
= getSDisp32(delta
+1);
2823 if (index_r
== R_RSP
&& 0==getRexX(pfx
)) {
2824 DIS(buf
, "%s%lld(%s)", segRegTxt(pfx
),
2825 d
, nameIRegRexB(8,pfx
,base_r
));
2827 return disAMode_copy2tmp(
2828 handleAddrOverrides(vbi
, pfx
,
2829 binop(Iop_Add64
, getIRegRexB(8,pfx
,base_r
), mkU64(d
)) ));
2832 DIS(buf
, "%s%lld(%s,%s)", segRegTxt(pfx
), d
,
2833 nameIRegRexB(8,pfx
,base_r
),
2834 nameIReg64rexX(pfx
,index_r
));
2836 DIS(buf
, "%s%lld(%s,%s,%d)", segRegTxt(pfx
), d
,
2837 nameIRegRexB(8,pfx
,base_r
),
2838 nameIReg64rexX(pfx
,index_r
), 1<<scale
);
2843 handleAddrOverrides(vbi
, pfx
,
2846 getIRegRexB(8,pfx
,base_r
),
2848 getIReg64rexX(pfx
,index_r
), mkU8(scale
))),
2851 vassert(0); /*NOTREACHED*/
2855 vpanic("disAMode(amd64)");
2856 return 0; /*notreached*/
2861 /* Similarly for VSIB addressing. This returns just the addend,
2862 and fills in *rI and *vscale with the register number of the vector
2863 index and its multiplicand. */
2865 IRTemp
disAVSIBMode ( /*OUT*/Int
* len
,
2866 const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
,
2867 /*OUT*/HChar
* buf
, /*OUT*/UInt
* rI
,
2868 IRType ty
, /*OUT*/Int
* vscale
)
2870 UChar mod_reg_rm
= getUChar(delta
);
2871 const HChar
*vindex
;
2877 if ((mod_reg_rm
& 7) != 4 || epartIsReg(mod_reg_rm
))
2878 return IRTemp_INVALID
;
2880 UChar sib
= getUChar(delta
+1);
2881 UChar scale
= toUChar((sib
>> 6) & 3);
2882 UChar index_r
= toUChar((sib
>> 3) & 7);
2883 UChar base_r
= toUChar(sib
& 7);
2885 /* correct since #(R13) == 8 + #(RBP) */
2886 Bool base_is_BPor13
= toBool(base_r
== R_RBP
);
2890 *rI
= index_r
| (getRexX(pfx
) << 3);
2892 vindex
= nameXMMReg(*rI
);
2894 vindex
= nameYMMReg(*rI
);
2897 switch (mod_reg_rm
>> 6) {
2899 if (base_is_BPor13
) {
2900 d
= getSDisp32(delta
);
2903 DIS(buf
, "%s%lld(,%s)", segRegTxt(pfx
), d
, vindex
);
2905 DIS(buf
, "%s%lld(,%s,%d)", segRegTxt(pfx
), d
, vindex
, 1<<scale
);
2907 return disAMode_copy2tmp( mkU64(d
) );
2910 DIS(buf
, "%s(%s,%s)", segRegTxt(pfx
),
2911 nameIRegRexB(8,pfx
,base_r
), vindex
);
2913 DIS(buf
, "%s(%s,%s,%d)", segRegTxt(pfx
),
2914 nameIRegRexB(8,pfx
,base_r
), vindex
, 1<<scale
);
2919 d
= getSDisp8(delta
);
2923 d
= getSDisp32(delta
);
2927 DIS(buf
, "%s%lld(%s,%s)", segRegTxt(pfx
), d
,
2928 nameIRegRexB(8,pfx
,base_r
), vindex
);
2930 DIS(buf
, "%s%lld(%s,%s,%d)", segRegTxt(pfx
), d
,
2931 nameIRegRexB(8,pfx
,base_r
), vindex
, 1<<scale
);
2937 return disAMode_copy2tmp( getIRegRexB(8,pfx
,base_r
) );
2938 return disAMode_copy2tmp( binop(Iop_Add64
, getIRegRexB(8,pfx
,base_r
),
2943 /* Figure out the number of (insn-stream) bytes constituting the amode
2944 beginning at delta. Is useful for getting hold of literals beyond
2945 the end of the amode before it has been disassembled. */
2947 static UInt
lengthAMode ( Prefix pfx
, Long delta
)
2949 UChar mod_reg_rm
= getUChar(delta
);
2952 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
2953 jump table seems a bit excessive.
2955 mod_reg_rm
&= 0xC7; /* is now XX000YYY */
2956 mod_reg_rm
= toUChar(mod_reg_rm
| (mod_reg_rm
>> 3));
2957 /* is now XX0XXYYY */
2958 mod_reg_rm
&= 0x1F; /* is now 000XXYYY */
2959 switch (mod_reg_rm
) {
2961 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp).
2962 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13).
2964 case 0x00: case 0x01: case 0x02: case 0x03:
2965 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
2968 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp)
2969 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12)
2971 case 0x08: case 0x09: case 0x0A: case 0x0B:
2972 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
2975 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp)
2976 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12)
2978 case 0x10: case 0x11: case 0x12: case 0x13:
2979 /* ! 14 */ case 0x15: case 0x16: case 0x17:
2982 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */
2983 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */
2984 /* Not an address, but still handled. */
2985 case 0x18: case 0x19: case 0x1A: case 0x1B:
2986 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
2994 /* SIB, with no displacement. */
2995 UChar sib
= getUChar(delta
);
2996 UChar base_r
= toUChar(sib
& 7);
2997 /* correct since #(R13) == 8 + #(RBP) */
2998 Bool base_is_BPor13
= toBool(base_r
== R_RBP
);
3000 if (base_is_BPor13
) {
3007 /* SIB, with 8-bit displacement. */
3011 /* SIB, with 32-bit displacement. */
3016 vpanic("lengthAMode(amd64)");
3017 return 0; /*notreached*/
3022 /*------------------------------------------------------------*/
3023 /*--- Disassembling common idioms ---*/
3024 /*------------------------------------------------------------*/
3027 enum { WithFlagNone
=2, WithFlagCarry
, WithFlagCarryX
, WithFlagOverX
}
3030 /* Handle binary integer instructions of the form
3033 Is passed the a ptr to the modRM byte, the actual operation, and the
3034 data size. Returns the address advanced completely over this
3037 E(src) is reg-or-mem
3040 If E is reg, --> GET %G, tmp
3044 If E is mem and OP is not reversible,
3045 --> (getAddr E) -> tmpa
3051 If E is mem and OP is reversible
3052 --> (getAddr E) -> tmpa
3058 ULong
dis_op2_E_G ( const VexAbiInfo
* vbi
,
3065 const HChar
* t_amd64opc
)
3069 IRType ty
= szToITy(size
);
3070 IRTemp dst1
= newTemp(ty
);
3071 IRTemp src
= newTemp(ty
);
3072 IRTemp dst0
= newTemp(ty
);
3073 UChar rm
= getUChar(delta0
);
3074 IRTemp addr
= IRTemp_INVALID
;
3076 /* Stay sane -- check for valid (op8, flag, keep) combinations. */
3080 case WithFlagNone
: case WithFlagCarry
:
3081 case WithFlagCarryX
: case WithFlagOverX
:
3089 vassert(flag
== WithFlagNone
|| flag
== WithFlagCarry
);
3090 if (flag
== WithFlagCarry
) vassert(keep
);
3093 vassert(flag
== WithFlagNone
);
3095 case Iop_Or8
: case Iop_Xor8
:
3096 vassert(flag
== WithFlagNone
);
3103 if (epartIsReg(rm
)) {
3104 /* Specially handle XOR reg,reg, because that doesn't really
3105 depend on reg, and doing the obvious thing potentially
3106 generates a spurious value check failure due to the bogus
3107 dependency. Ditto SUB/SBB reg,reg. */
3108 if ((op8
== Iop_Xor8
|| ((op8
== Iop_Sub8
) && keep
))
3109 && offsetIRegG(size
,pfx
,rm
) == offsetIRegE(size
,pfx
,rm
)) {
3110 putIRegG(size
,pfx
,rm
, mkU(ty
,0));
3113 assign( dst0
, getIRegG(size
,pfx
,rm
) );
3114 assign( src
, getIRegE(size
,pfx
,rm
) );
3116 if (op8
== Iop_Add8
&& flag
== WithFlagCarry
) {
3117 helper_ADC( size
, dst1
, dst0
, src
,
3118 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
3119 putIRegG(size
, pfx
, rm
, mkexpr(dst1
));
3121 if (op8
== Iop_Sub8
&& flag
== WithFlagCarry
) {
3122 helper_SBB( size
, dst1
, dst0
, src
,
3123 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
3124 putIRegG(size
, pfx
, rm
, mkexpr(dst1
));
3126 if (op8
== Iop_Add8
&& flag
== WithFlagCarryX
) {
3127 helper_ADCX_ADOX( True
/*isADCX*/, size
, dst1
, dst0
, src
);
3128 putIRegG(size
, pfx
, rm
, mkexpr(dst1
));
3130 if (op8
== Iop_Add8
&& flag
== WithFlagOverX
) {
3131 helper_ADCX_ADOX( False
/*!isADCX*/, size
, dst1
, dst0
, src
);
3132 putIRegG(size
, pfx
, rm
, mkexpr(dst1
));
3134 assign( dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)) );
3136 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
3138 setFlags_DEP1(op8
, dst1
, ty
);
3140 putIRegG(size
, pfx
, rm
, mkexpr(dst1
));
3143 DIP("%s%c %s,%s\n", t_amd64opc
, nameISize(size
),
3144 nameIRegE(size
,pfx
,rm
),
3145 nameIRegG(size
,pfx
,rm
));
3148 /* E refers to memory */
3149 addr
= disAMode ( &len
, vbi
, pfx
, delta0
, dis_buf
, 0 );
3150 assign( dst0
, getIRegG(size
,pfx
,rm
) );
3151 assign( src
, loadLE(szToITy(size
), mkexpr(addr
)) );
3153 if (op8
== Iop_Add8
&& flag
== WithFlagCarry
) {
3154 helper_ADC( size
, dst1
, dst0
, src
,
3155 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
3156 putIRegG(size
, pfx
, rm
, mkexpr(dst1
));
3158 if (op8
== Iop_Sub8
&& flag
== WithFlagCarry
) {
3159 helper_SBB( size
, dst1
, dst0
, src
,
3160 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
3161 putIRegG(size
, pfx
, rm
, mkexpr(dst1
));
3163 if (op8
== Iop_Add8
&& flag
== WithFlagCarryX
) {
3164 helper_ADCX_ADOX( True
/*isADCX*/, size
, dst1
, dst0
, src
);
3165 putIRegG(size
, pfx
, rm
, mkexpr(dst1
));
3167 if (op8
== Iop_Add8
&& flag
== WithFlagOverX
) {
3168 helper_ADCX_ADOX( False
/*!isADCX*/, size
, dst1
, dst0
, src
);
3169 putIRegG(size
, pfx
, rm
, mkexpr(dst1
));
3171 assign( dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)) );
3173 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
3175 setFlags_DEP1(op8
, dst1
, ty
);
3177 putIRegG(size
, pfx
, rm
, mkexpr(dst1
));
3180 DIP("%s%c %s,%s\n", t_amd64opc
, nameISize(size
),
3181 dis_buf
, nameIRegG(size
, pfx
, rm
));
3188 /* Handle binary integer instructions of the form
3191 Is passed the a ptr to the modRM byte, the actual operation, and the
3192 data size. Returns the address advanced completely over this
3196 E(dst) is reg-or-mem
3198 If E is reg, --> GET %E, tmp
3202 If E is mem, --> (getAddr E) -> tmpa
3208 ULong
dis_op2_G_E ( const VexAbiInfo
* vbi
,
3215 const HChar
* t_amd64opc
)
3219 IRType ty
= szToITy(size
);
3220 IRTemp dst1
= newTemp(ty
);
3221 IRTemp src
= newTemp(ty
);
3222 IRTemp dst0
= newTemp(ty
);
3223 UChar rm
= getUChar(delta0
);
3224 IRTemp addr
= IRTemp_INVALID
;
3226 /* Stay sane -- check for valid (op8, flag, keep) combinations. */
3229 vassert(flag
== WithFlagNone
|| flag
== WithFlagCarry
);
3233 vassert(flag
== WithFlagNone
|| flag
== WithFlagCarry
);
3234 if (flag
== WithFlagCarry
) vassert(keep
);
3236 case Iop_And8
: case Iop_Or8
: case Iop_Xor8
:
3237 vassert(flag
== WithFlagNone
);
3244 /* flag != WithFlagNone is only allowed for Add and Sub and indicates the
3245 intended operation is add-with-carry or subtract-with-borrow. */
3247 if (epartIsReg(rm
)) {
3248 /* Specially handle XOR reg,reg, because that doesn't really
3249 depend on reg, and doing the obvious thing potentially
3250 generates a spurious value check failure due to the bogus
3251 dependency. Ditto SUB/SBB reg,reg. */
3252 if ((op8
== Iop_Xor8
|| ((op8
== Iop_Sub8
) && keep
))
3253 && offsetIRegG(size
,pfx
,rm
) == offsetIRegE(size
,pfx
,rm
)) {
3254 putIRegE(size
,pfx
,rm
, mkU(ty
,0));
3257 assign(dst0
, getIRegE(size
,pfx
,rm
));
3258 assign(src
, getIRegG(size
,pfx
,rm
));
3260 if (op8
== Iop_Add8
&& flag
== WithFlagCarry
) {
3261 helper_ADC( size
, dst1
, dst0
, src
,
3262 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
3263 putIRegE(size
, pfx
, rm
, mkexpr(dst1
));
3265 if (op8
== Iop_Sub8
&& flag
== WithFlagCarry
) {
3266 helper_SBB( size
, dst1
, dst0
, src
,
3267 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
3268 putIRegE(size
, pfx
, rm
, mkexpr(dst1
));
3270 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)));
3272 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
3274 setFlags_DEP1(op8
, dst1
, ty
);
3276 putIRegE(size
, pfx
, rm
, mkexpr(dst1
));
3279 DIP("%s%c %s,%s\n", t_amd64opc
, nameISize(size
),
3280 nameIRegG(size
,pfx
,rm
),
3281 nameIRegE(size
,pfx
,rm
));
3285 /* E refers to memory */
3287 addr
= disAMode ( &len
, vbi
, pfx
, delta0
, dis_buf
, 0 );
3288 assign(dst0
, loadLE(ty
,mkexpr(addr
)));
3289 assign(src
, getIRegG(size
,pfx
,rm
));
3291 if (op8
== Iop_Add8
&& flag
== WithFlagCarry
) {
3292 if (haveLOCK(pfx
)) {
3293 /* cas-style store */
3294 helper_ADC( size
, dst1
, dst0
, src
,
3295 /*store*/addr
, dst0
/*expVal*/, guest_RIP_curr_instr
);
3298 helper_ADC( size
, dst1
, dst0
, src
,
3299 /*store*/addr
, IRTemp_INVALID
, 0 );
3302 if (op8
== Iop_Sub8
&& flag
== WithFlagCarry
) {
3303 if (haveLOCK(pfx
)) {
3304 /* cas-style store */
3305 helper_SBB( size
, dst1
, dst0
, src
,
3306 /*store*/addr
, dst0
/*expVal*/, guest_RIP_curr_instr
);
3309 helper_SBB( size
, dst1
, dst0
, src
,
3310 /*store*/addr
, IRTemp_INVALID
, 0 );
3313 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)));
3315 if (haveLOCK(pfx
)) {
3316 if (0) vex_printf("locked case\n" );
3317 casLE( mkexpr(addr
),
3318 mkexpr(dst0
)/*expval*/,
3319 mkexpr(dst1
)/*newval*/, guest_RIP_curr_instr
);
3321 if (0) vex_printf("nonlocked case\n");
3322 storeLE(mkexpr(addr
), mkexpr(dst1
));
3326 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
3328 setFlags_DEP1(op8
, dst1
, ty
);
3331 DIP("%s%c %s,%s\n", t_amd64opc
, nameISize(size
),
3332 nameIRegG(size
,pfx
,rm
), dis_buf
);
3338 /* Handle move instructions of the form
3341 Is passed the a ptr to the modRM byte, and the data size. Returns
3342 the address advanced completely over this instruction.
3344 E(src) is reg-or-mem
3347 If E is reg, --> GET %E, tmpv
3350 If E is mem --> (getAddr E) -> tmpa
3355 ULong
dis_mov_E_G ( const VexAbiInfo
* vbi
,
3361 UChar rm
= getUChar(delta0
);
3364 if (epartIsReg(rm
)) {
3365 putIRegG(size
, pfx
, rm
, getIRegE(size
, pfx
, rm
));
3366 DIP("mov%c %s,%s\n", nameISize(size
),
3367 nameIRegE(size
,pfx
,rm
),
3368 nameIRegG(size
,pfx
,rm
));
3372 /* E refers to memory */
3374 IRTemp addr
= disAMode ( &len
, vbi
, pfx
, delta0
, dis_buf
, 0 );
3375 putIRegG(size
, pfx
, rm
, loadLE(szToITy(size
), mkexpr(addr
)));
3376 DIP("mov%c %s,%s\n", nameISize(size
),
3378 nameIRegG(size
,pfx
,rm
));
3384 /* Handle move instructions of the form
3387 Is passed the a ptr to the modRM byte, and the data size. Returns
3388 the address advanced completely over this instruction.
3389 We have to decide here whether F2 or F3 are acceptable. F2 never is.
3392 E(dst) is reg-or-mem
3394 If E is reg, --> GET %G, tmp
3397 If E is mem, --> (getAddr E) -> tmpa
3402 ULong
dis_mov_G_E ( const VexAbiInfo
* vbi
,
3409 UChar rm
= getUChar(delta0
);
3414 if (epartIsReg(rm
)) {
3415 if (haveF2orF3(pfx
)) { *ok
= False
; return delta0
; }
3416 putIRegE(size
, pfx
, rm
, getIRegG(size
, pfx
, rm
));
3417 DIP("mov%c %s,%s\n", nameISize(size
),
3418 nameIRegG(size
,pfx
,rm
),
3419 nameIRegE(size
,pfx
,rm
));
3423 /* E refers to memory */
3425 if (haveF2(pfx
)) { *ok
= False
; return delta0
; }
3426 /* F3(XRELEASE) is acceptable, though. */
3427 IRTemp addr
= disAMode ( &len
, vbi
, pfx
, delta0
, dis_buf
, 0 );
3428 storeLE( mkexpr(addr
), getIRegG(size
, pfx
, rm
) );
3429 DIP("mov%c %s,%s\n", nameISize(size
),
3430 nameIRegG(size
,pfx
,rm
),
3437 /* op $immediate, AL/AX/EAX/RAX. */
3439 ULong
dis_op_imm_A ( Int size
,
3444 const HChar
* t_amd64opc
)
3446 Int size4
= imin(size
,4);
3447 IRType ty
= szToITy(size
);
3448 IRTemp dst0
= newTemp(ty
);
3449 IRTemp src
= newTemp(ty
);
3450 IRTemp dst1
= newTemp(ty
);
3451 Long lit
= getSDisp(size4
,delta
);
3452 assign(dst0
, getIRegRAX(size
));
3453 assign(src
, mkU(ty
,lit
& mkSizeMask(size
)));
3455 if (isAddSub(op8
) && !carrying
) {
3456 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)) );
3457 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
3462 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)) );
3463 setFlags_DEP1(op8
, dst1
, ty
);
3466 if (op8
== Iop_Add8
&& carrying
) {
3467 helper_ADC( size
, dst1
, dst0
, src
,
3468 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
3471 if (op8
== Iop_Sub8
&& carrying
) {
3472 helper_SBB( size
, dst1
, dst0
, src
,
3473 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
3476 vpanic("dis_op_imm_A(amd64,guest)");
3479 putIRegRAX(size
, mkexpr(dst1
));
3481 DIP("%s%c $%lld, %s\n", t_amd64opc
, nameISize(size
),
3482 lit
, nameIRegRAX(size
));
3487 /* Sign- and Zero-extending moves. */
3489 ULong
dis_movx_E_G ( const VexAbiInfo
* vbi
,
3491 Long delta
, Int szs
, Int szd
, Bool sign_extend
)
3493 UChar rm
= getUChar(delta
);
3494 if (epartIsReg(rm
)) {
3495 putIRegG(szd
, pfx
, rm
,
3497 szs
,szd
,sign_extend
,
3498 getIRegE(szs
,pfx
,rm
)));
3499 DIP("mov%c%c%c %s,%s\n", sign_extend
? 's' : 'z',
3502 nameIRegE(szs
,pfx
,rm
),
3503 nameIRegG(szd
,pfx
,rm
));
3507 /* E refers to memory */
3511 IRTemp addr
= disAMode ( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
3512 putIRegG(szd
, pfx
, rm
,
3514 szs
,szd
,sign_extend
,
3515 loadLE(szToITy(szs
),mkexpr(addr
))));
3516 DIP("mov%c%c%c %s,%s\n", sign_extend
? 's' : 'z',
3520 nameIRegG(szd
,pfx
,rm
));
3526 /* Generate code to divide ArchRegs RDX:RAX / EDX:EAX / DX:AX / AX by
3527 the 64 / 32 / 16 / 8 bit quantity in the given IRTemp. */
3529 void codegen_div ( Int sz
, IRTemp t
, Bool signed_divide
)
3531 /* special-case the 64-bit case */
3533 IROp op
= signed_divide
? Iop_DivModS128to64
3534 : Iop_DivModU128to64
;
3535 IRTemp src128
= newTemp(Ity_I128
);
3536 IRTemp dst128
= newTemp(Ity_I128
);
3537 assign( src128
, binop(Iop_64HLto128
,
3539 getIReg64(R_RAX
)) );
3540 assign( dst128
, binop(op
, mkexpr(src128
), mkexpr(t
)) );
3541 putIReg64( R_RAX
, unop(Iop_128to64
,mkexpr(dst128
)) );
3542 putIReg64( R_RDX
, unop(Iop_128HIto64
,mkexpr(dst128
)) );
3544 IROp op
= signed_divide
? Iop_DivModS64to32
3545 : Iop_DivModU64to32
;
3546 IRTemp src64
= newTemp(Ity_I64
);
3547 IRTemp dst64
= newTemp(Ity_I64
);
3551 binop(Iop_32HLto64
, getIRegRDX(4), getIRegRAX(4)) );
3553 binop(op
, mkexpr(src64
), mkexpr(t
)) );
3554 putIRegRAX( 4, unop(Iop_64to32
,mkexpr(dst64
)) );
3555 putIRegRDX( 4, unop(Iop_64HIto32
,mkexpr(dst64
)) );
3558 IROp widen3264
= signed_divide
? Iop_32Sto64
: Iop_32Uto64
;
3559 IROp widen1632
= signed_divide
? Iop_16Sto32
: Iop_16Uto32
;
3560 assign( src64
, unop(widen3264
,
3564 assign( dst64
, binop(op
, mkexpr(src64
), unop(widen1632
,mkexpr(t
))) );
3565 putIRegRAX( 2, unop(Iop_32to16
,unop(Iop_64to32
,mkexpr(dst64
))) );
3566 putIRegRDX( 2, unop(Iop_32to16
,unop(Iop_64HIto32
,mkexpr(dst64
))) );
3570 IROp widen3264
= signed_divide
? Iop_32Sto64
: Iop_32Uto64
;
3571 IROp widen1632
= signed_divide
? Iop_16Sto32
: Iop_16Uto32
;
3572 IROp widen816
= signed_divide
? Iop_8Sto16
: Iop_8Uto16
;
3573 assign( src64
, unop(widen3264
,
3574 unop(widen1632
, getIRegRAX(2))) );
3576 binop(op
, mkexpr(src64
),
3577 unop(widen1632
, unop(widen816
, mkexpr(t
)))) );
3578 putIRegRAX( 1, unop(Iop_16to8
,
3580 unop(Iop_64to32
,mkexpr(dst64
)))) );
3581 putIRegAH( unop(Iop_16to8
,
3583 unop(Iop_64HIto32
,mkexpr(dst64
)))) );
3587 vpanic("codegen_div(amd64)");
3593 ULong
dis_Grp1 ( const VexAbiInfo
* vbi
,
3595 Long delta
, UChar modrm
,
3596 Int am_sz
, Int d_sz
, Int sz
, Long d64
)
3600 IRType ty
= szToITy(sz
);
3601 IRTemp dst1
= newTemp(ty
);
3602 IRTemp src
= newTemp(ty
);
3603 IRTemp dst0
= newTemp(ty
);
3604 IRTemp addr
= IRTemp_INVALID
;
3605 IROp op8
= Iop_INVALID
;
3606 ULong mask
= mkSizeMask(sz
);
3608 switch (gregLO3ofRM(modrm
)) {
3609 case 0: op8
= Iop_Add8
; break; case 1: op8
= Iop_Or8
; break;
3610 case 2: break; // ADC
3611 case 3: break; // SBB
3612 case 4: op8
= Iop_And8
; break; case 5: op8
= Iop_Sub8
; break;
3613 case 6: op8
= Iop_Xor8
; break; case 7: op8
= Iop_Sub8
; break;
3615 default: vpanic("dis_Grp1(amd64): unhandled case");
3618 if (epartIsReg(modrm
)) {
3619 vassert(am_sz
== 1);
3621 assign(dst0
, getIRegE(sz
,pfx
,modrm
));
3622 assign(src
, mkU(ty
,d64
& mask
));
3624 if (gregLO3ofRM(modrm
) == 2 /* ADC */) {
3625 helper_ADC( sz
, dst1
, dst0
, src
,
3626 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
3628 if (gregLO3ofRM(modrm
) == 3 /* SBB */) {
3629 helper_SBB( sz
, dst1
, dst0
, src
,
3630 /*no store*/IRTemp_INVALID
, IRTemp_INVALID
, 0 );
3632 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)));
3634 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
3636 setFlags_DEP1(op8
, dst1
, ty
);
3639 if (gregLO3ofRM(modrm
) < 7)
3640 putIRegE(sz
, pfx
, modrm
, mkexpr(dst1
));
3642 delta
+= (am_sz
+ d_sz
);
3643 DIP("%s%c $%lld, %s\n",
3644 nameGrp1(gregLO3ofRM(modrm
)), nameISize(sz
), d64
,
3645 nameIRegE(sz
,pfx
,modrm
));
3647 addr
= disAMode ( &len
, vbi
, pfx
, delta
, dis_buf
, /*xtra*/d_sz
);
3649 assign(dst0
, loadLE(ty
,mkexpr(addr
)));
3650 assign(src
, mkU(ty
,d64
& mask
));
3652 if (gregLO3ofRM(modrm
) == 2 /* ADC */) {
3653 if (haveLOCK(pfx
)) {
3654 /* cas-style store */
3655 helper_ADC( sz
, dst1
, dst0
, src
,
3656 /*store*/addr
, dst0
/*expVal*/, guest_RIP_curr_instr
);
3659 helper_ADC( sz
, dst1
, dst0
, src
,
3660 /*store*/addr
, IRTemp_INVALID
, 0 );
3663 if (gregLO3ofRM(modrm
) == 3 /* SBB */) {
3664 if (haveLOCK(pfx
)) {
3665 /* cas-style store */
3666 helper_SBB( sz
, dst1
, dst0
, src
,
3667 /*store*/addr
, dst0
/*expVal*/, guest_RIP_curr_instr
);
3670 helper_SBB( sz
, dst1
, dst0
, src
,
3671 /*store*/addr
, IRTemp_INVALID
, 0 );
3674 assign(dst1
, binop(mkSizedOp(ty
,op8
), mkexpr(dst0
), mkexpr(src
)));
3675 if (gregLO3ofRM(modrm
) < 7) {
3676 if (haveLOCK(pfx
)) {
3677 casLE( mkexpr(addr
), mkexpr(dst0
)/*expVal*/,
3678 mkexpr(dst1
)/*newVal*/,
3679 guest_RIP_curr_instr
);
3681 storeLE(mkexpr(addr
), mkexpr(dst1
));
3685 setFlags_DEP1_DEP2(op8
, dst0
, src
, ty
);
3687 setFlags_DEP1(op8
, dst1
, ty
);
3690 delta
+= (len
+d_sz
);
3691 DIP("%s%c $%lld, %s\n",
3692 nameGrp1(gregLO3ofRM(modrm
)), nameISize(sz
),
3699 /* Group 2 extended opcodes. shift_expr must be an 8-bit typed
3703 ULong
dis_Grp2 ( const VexAbiInfo
* vbi
,
3705 Long delta
, UChar modrm
,
3706 Int am_sz
, Int d_sz
, Int sz
, IRExpr
* shift_expr
,
3707 const HChar
* shift_expr_txt
, Bool
* decode_OK
)
3709 /* delta on entry points at the modrm byte. */
3712 Bool isShift
, isRotate
, isRotateC
;
3713 IRType ty
= szToITy(sz
);
3714 IRTemp dst0
= newTemp(ty
);
3715 IRTemp dst1
= newTemp(ty
);
3716 IRTemp addr
= IRTemp_INVALID
;
3720 vassert(sz
== 1 || sz
== 2 || sz
== 4 || sz
== 8);
3722 /* Put value to shift/rotate in dst0. */
3723 if (epartIsReg(modrm
)) {
3724 assign(dst0
, getIRegE(sz
, pfx
, modrm
));
3725 delta
+= (am_sz
+ d_sz
);
3727 addr
= disAMode ( &len
, vbi
, pfx
, delta
, dis_buf
, /*xtra*/d_sz
);
3728 assign(dst0
, loadLE(ty
,mkexpr(addr
)));
3729 delta
+= len
+ d_sz
;
3733 switch (gregLO3ofRM(modrm
)) { case 4: case 5: case 6: case 7: isShift
= True
; }
3736 switch (gregLO3ofRM(modrm
)) { case 0: case 1: isRotate
= True
; }
3739 switch (gregLO3ofRM(modrm
)) { case 2: case 3: isRotateC
= True
; }
3741 if (!isShift
&& !isRotate
&& !isRotateC
) {
3743 vpanic("dis_Grp2(Reg): unhandled case(amd64)");
3747 /* Call a helper; this insn is so ridiculous it does not deserve
3748 better. One problem is, the helper has to calculate both the
3749 new value and the new flags. This is more than 64 bits, and
3750 there is no way to return more than 64 bits from the helper.
3751 Hence the crude and obvious solution is to call it twice,
3752 using the sign of the sz field to indicate whether it is the
3753 value or rflags result we want.
3755 Bool left
= toBool(gregLO3ofRM(modrm
) == 2);
3757 IRExpr
** argsRFLAGS
;
3759 IRTemp new_value
= newTemp(Ity_I64
);
3760 IRTemp new_rflags
= newTemp(Ity_I64
);
3761 IRTemp old_rflags
= newTemp(Ity_I64
);
3763 assign( old_rflags
, widenUto64(mk_amd64g_calculate_rflags_all()) );
3766 = mkIRExprVec_4( widenUto64(mkexpr(dst0
)), /* thing to rotate */
3767 widenUto64(shift_expr
), /* rotate amount */
3774 left
? "amd64g_calculate_RCL" : "amd64g_calculate_RCR",
3775 left
? &amd64g_calculate_RCL
: &amd64g_calculate_RCR
,
3781 = mkIRExprVec_4( widenUto64(mkexpr(dst0
)), /* thing to rotate */
3782 widenUto64(shift_expr
), /* rotate amount */
3789 left
? "amd64g_calculate_RCL" : "amd64g_calculate_RCR",
3790 left
? &amd64g_calculate_RCL
: &amd64g_calculate_RCR
,
3795 assign( dst1
, narrowTo(ty
, mkexpr(new_value
)) );
3796 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
3797 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(new_rflags
) ));
3798 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
3799 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
3805 IRTemp pre64
= newTemp(Ity_I64
);
3806 IRTemp res64
= newTemp(Ity_I64
);
3807 IRTemp res64ss
= newTemp(Ity_I64
);
3808 IRTemp shift_amt
= newTemp(Ity_I8
);
3809 UChar mask
= toUChar(sz
==8 ? 63 : 31);
3812 switch (gregLO3ofRM(modrm
)) {
3813 case 4: op64
= Iop_Shl64
; break;
3814 case 5: op64
= Iop_Shr64
; break;
3815 case 6: op64
= Iop_Shl64
; break;
3816 case 7: op64
= Iop_Sar64
; break;
3818 default: vpanic("dis_Grp2:shift"); break;
3821 /* Widen the value to be shifted to 64 bits, do the shift, and
3822 narrow back down. This seems surprisingly long-winded, but
3823 unfortunately the AMD semantics requires that 8/16/32-bit
3824 shifts give defined results for shift values all the way up
3825 to 32, and this seems the simplest way to do it. It has the
3826 advantage that the only IR level shifts generated are of 64
3827 bit values, and the shift amount is guaranteed to be in the
3828 range 0 .. 63, thereby observing the IR semantics requiring
3829 all shift values to be in the range 0 .. 2^word_size-1.
3831 Therefore the shift amount is masked with 63 for 64-bit shifts
3832 and 31 for all others.
3834 /* shift_amt = shift_expr & MASK, regardless of operation size */
3835 assign( shift_amt
, binop(Iop_And8
, shift_expr
, mkU8(mask
)) );
3837 /* suitably widen the value to be shifted to 64 bits. */
3838 assign( pre64
, op64
==Iop_Sar64
? widenSto64(mkexpr(dst0
))
3839 : widenUto64(mkexpr(dst0
)) );
3841 /* res64 = pre64 `shift` shift_amt */
3842 assign( res64
, binop(op64
, mkexpr(pre64
), mkexpr(shift_amt
)) );
3844 /* res64ss = pre64 `shift` ((shift_amt - 1) & MASK) */
3850 mkexpr(shift_amt
), mkU8(1)),
3853 /* Build the flags thunk. */
3854 setFlags_DEP1_DEP2_shift(op64
, res64
, res64ss
, ty
, shift_amt
);
3856 /* Narrow the result back down. */
3857 assign( dst1
, narrowTo(ty
, mkexpr(res64
)) );
3859 } /* if (isShift) */
3863 Int ccOp
= ty
==Ity_I8
? 0 : (ty
==Ity_I16
? 1
3864 : (ty
==Ity_I32
? 2 : 3));
3865 Bool left
= toBool(gregLO3ofRM(modrm
) == 0);
3866 IRTemp rot_amt
= newTemp(Ity_I8
);
3867 IRTemp rot_amt64
= newTemp(Ity_I8
);
3868 IRTemp oldFlags
= newTemp(Ity_I64
);
3869 UChar mask
= toUChar(sz
==8 ? 63 : 31);
3871 /* rot_amt = shift_expr & mask */
3872 /* By masking the rotate amount thusly, the IR-level Shl/Shr
3873 expressions never shift beyond the word size and thus remain
3875 assign(rot_amt64
, binop(Iop_And8
, shift_expr
, mkU8(mask
)));
3878 assign(rot_amt
, mkexpr(rot_amt64
));
3880 assign(rot_amt
, binop(Iop_And8
, mkexpr(rot_amt64
), mkU8(8*sz
-1)));
3884 /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */
3886 binop( mkSizedOp(ty
,Iop_Or8
),
3887 binop( mkSizedOp(ty
,Iop_Shl8
),
3891 binop( mkSizedOp(ty
,Iop_Shr8
),
3893 binop(Iop_Sub8
,mkU8(8*sz
), mkexpr(rot_amt
))
3897 ccOp
+= AMD64G_CC_OP_ROLB
;
3899 } else { /* right */
3901 /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */
3903 binop( mkSizedOp(ty
,Iop_Or8
),
3904 binop( mkSizedOp(ty
,Iop_Shr8
),
3908 binop( mkSizedOp(ty
,Iop_Shl8
),
3910 binop(Iop_Sub8
,mkU8(8*sz
), mkexpr(rot_amt
))
3914 ccOp
+= AMD64G_CC_OP_RORB
;
3918 /* dst1 now holds the rotated value. Build flag thunk. We
3919 need the resulting value for this, and the previous flags.
3920 Except don't set it if the rotate count is zero. */
3922 assign(oldFlags
, mk_amd64g_calculate_rflags_all());
3924 /* rot_amt64 :: Ity_I8. We need to convert it to I1. */
3925 IRTemp rot_amt64b
= newTemp(Ity_I1
);
3926 assign(rot_amt64b
, binop(Iop_CmpNE8
, mkexpr(rot_amt64
), mkU8(0)) );
3928 /* CC_DEP1 is the rotated value. CC_NDEP is flags before. */
3929 stmt( IRStmt_Put( OFFB_CC_OP
,
3930 IRExpr_ITE( mkexpr(rot_amt64b
),
3932 IRExpr_Get(OFFB_CC_OP
,Ity_I64
) ) ));
3933 stmt( IRStmt_Put( OFFB_CC_DEP1
,
3934 IRExpr_ITE( mkexpr(rot_amt64b
),
3935 widenUto64(mkexpr(dst1
)),
3936 IRExpr_Get(OFFB_CC_DEP1
,Ity_I64
) ) ));
3937 stmt( IRStmt_Put( OFFB_CC_DEP2
,
3938 IRExpr_ITE( mkexpr(rot_amt64b
),
3940 IRExpr_Get(OFFB_CC_DEP2
,Ity_I64
) ) ));
3941 stmt( IRStmt_Put( OFFB_CC_NDEP
,
3942 IRExpr_ITE( mkexpr(rot_amt64b
),
3944 IRExpr_Get(OFFB_CC_NDEP
,Ity_I64
) ) ));
3945 } /* if (isRotate) */
3947 /* Save result, and finish up. */
3948 if (epartIsReg(modrm
)) {
3949 putIRegE(sz
, pfx
, modrm
, mkexpr(dst1
));
3950 if (vex_traceflags
& VEX_TRACE_FE
) {
3952 nameGrp2(gregLO3ofRM(modrm
)), nameISize(sz
) );
3954 vex_printf("%s", shift_expr_txt
);
3956 ppIRExpr(shift_expr
);
3957 vex_printf(", %s\n", nameIRegE(sz
,pfx
,modrm
));
3960 storeLE(mkexpr(addr
), mkexpr(dst1
));
3961 if (vex_traceflags
& VEX_TRACE_FE
) {
3963 nameGrp2(gregLO3ofRM(modrm
)), nameISize(sz
) );
3965 vex_printf("%s", shift_expr_txt
);
3967 ppIRExpr(shift_expr
);
3968 vex_printf(", %s\n", dis_buf
);
3975 /* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */
3977 ULong
dis_Grp8_Imm ( const VexAbiInfo
* vbi
,
3979 Long delta
, UChar modrm
,
3980 Int am_sz
, Int sz
, ULong src_val
,
3983 /* src_val denotes a d8.
3984 And delta on entry points at the modrm byte. */
3986 IRType ty
= szToITy(sz
);
3987 IRTemp t2
= newTemp(Ity_I64
);
3988 IRTemp t2m
= newTemp(Ity_I64
);
3989 IRTemp t_addr
= IRTemp_INVALID
;
3993 /* we're optimists :-) */
3996 /* Check whether F2 or F3 are acceptable. */
3997 if (epartIsReg(modrm
)) {
3998 /* F2 or F3 are not allowed in the register case. */
3999 if (haveF2orF3(pfx
)) {
4004 /* F2 or F3 (but not both) are allowable provided LOCK is also
4006 if (haveF2orF3(pfx
)) {
4007 if (haveF2andF3(pfx
) || !haveLOCK(pfx
)) {
4014 /* Limit src_val -- the bit offset -- to something within a word.
4015 The Intel docs say that literal offsets larger than a word are
4016 masked in this way. */
4018 case 2: src_val
&= 15; break;
4019 case 4: src_val
&= 31; break;
4020 case 8: src_val
&= 63; break;
4021 default: *decode_OK
= False
; return delta
;
4024 /* Invent a mask suitable for the operation. */
4025 switch (gregLO3ofRM(modrm
)) {
4026 case 4: /* BT */ mask
= 0; break;
4027 case 5: /* BTS */ mask
= 1ULL << src_val
; break;
4028 case 6: /* BTR */ mask
= ~(1ULL << src_val
); break;
4029 case 7: /* BTC */ mask
= 1ULL << src_val
; break;
4030 /* If this needs to be extended, probably simplest to make a
4031 new function to handle the other cases (0 .. 3). The
4032 Intel docs do however not indicate any use for 0 .. 3, so
4033 we don't expect this to happen. */
4034 default: *decode_OK
= False
; return delta
;
4037 /* Fetch the value to be tested and modified into t2, which is
4038 64-bits wide regardless of sz. */
4039 if (epartIsReg(modrm
)) {
4040 vassert(am_sz
== 1);
4041 assign( t2
, widenUto64(getIRegE(sz
, pfx
, modrm
)) );
4042 delta
+= (am_sz
+ 1);
4043 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm
)),
4045 src_val
, nameIRegE(sz
,pfx
,modrm
));
4048 t_addr
= disAMode ( &len
, vbi
, pfx
, delta
, dis_buf
, 1 );
4050 assign( t2
, widenUto64(loadLE(ty
, mkexpr(t_addr
))) );
4051 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm
)),
4056 /* Compute the new value into t2m, if non-BT. */
4057 switch (gregLO3ofRM(modrm
)) {
4061 assign( t2m
, binop(Iop_Or64
, mkU64(mask
), mkexpr(t2
)) );
4064 assign( t2m
, binop(Iop_And64
, mkU64(mask
), mkexpr(t2
)) );
4067 assign( t2m
, binop(Iop_Xor64
, mkU64(mask
), mkexpr(t2
)) );
4070 /*NOTREACHED*/ /*the previous switch guards this*/
4074 /* Write the result back, if non-BT. */
4075 if (gregLO3ofRM(modrm
) != 4 /* BT */) {
4076 if (epartIsReg(modrm
)) {
4077 putIRegE(sz
, pfx
, modrm
, narrowTo(ty
, mkexpr(t2m
)));
4079 if (haveLOCK(pfx
)) {
4080 casLE( mkexpr(t_addr
),
4081 narrowTo(ty
, mkexpr(t2
))/*expd*/,
4082 narrowTo(ty
, mkexpr(t2m
))/*new*/,
4083 guest_RIP_curr_instr
);
4085 storeLE(mkexpr(t_addr
), narrowTo(ty
, mkexpr(t2m
)));
4090 /* Copy relevant bit from t2 into the carry flag. */
4091 /* Flags: C=selected bit, O,S,A,P undefined, Z unchanged */
4092 /* so let's also keep O,S,A,P unchanged */
4093 const ULong maskC
= AMD64G_CC_MASK_C
;
4094 const ULong maskOSZAP
= AMD64G_CC_MASK_O
| AMD64G_CC_MASK_S
4095 | AMD64G_CC_MASK_Z
| AMD64G_CC_MASK_A
4098 IRTemp old_rflags
= newTemp(Ity_I64
);
4099 assign(old_rflags
, mk_amd64g_calculate_rflags_all());
4101 IRTemp new_rflags
= newTemp(Ity_I64
);
4104 binop(Iop_And64
, mkexpr(old_rflags
), mkU64(maskOSZAP
)),
4106 binop(Iop_Shr64
, mkexpr(t2
), mkU8(src_val
)),
4109 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
4110 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
4111 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(new_rflags
) ));
4112 /* Set NDEP even though it isn't used. This makes redundant-PUT
4113 elimination of previous stores to this field work better. */
4114 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
4120 /* Signed/unsigned widening multiply. Generate IR to multiply the
4121 value in RAX/EAX/AX/AL by the given IRTemp, and park the result in
4122 RDX:RAX/EDX:EAX/DX:AX/AX.
4124 static void codegen_mulL_A_D ( Int sz
, Bool syned
,
4125 IRTemp tmp
, const HChar
* tmp_txt
)
4127 IRType ty
= szToITy(sz
);
4128 IRTemp t1
= newTemp(ty
);
4130 assign( t1
, getIRegRAX(sz
) );
4134 IRTemp res128
= newTemp(Ity_I128
);
4135 IRTemp resHi
= newTemp(Ity_I64
);
4136 IRTemp resLo
= newTemp(Ity_I64
);
4137 IROp mulOp
= syned
? Iop_MullS64
: Iop_MullU64
;
4138 UInt tBaseOp
= syned
? AMD64G_CC_OP_SMULB
: AMD64G_CC_OP_UMULB
;
4139 setFlags_MUL ( Ity_I64
, t1
, tmp
, tBaseOp
);
4140 assign( res128
, binop(mulOp
, mkexpr(t1
), mkexpr(tmp
)) );
4141 assign( resHi
, unop(Iop_128HIto64
,mkexpr(res128
)));
4142 assign( resLo
, unop(Iop_128to64
,mkexpr(res128
)));
4143 putIReg64(R_RDX
, mkexpr(resHi
));
4144 putIReg64(R_RAX
, mkexpr(resLo
));
4148 IRTemp res64
= newTemp(Ity_I64
);
4149 IRTemp resHi
= newTemp(Ity_I32
);
4150 IRTemp resLo
= newTemp(Ity_I32
);
4151 IROp mulOp
= syned
? Iop_MullS32
: Iop_MullU32
;
4152 UInt tBaseOp
= syned
? AMD64G_CC_OP_SMULB
: AMD64G_CC_OP_UMULB
;
4153 setFlags_MUL ( Ity_I32
, t1
, tmp
, tBaseOp
);
4154 assign( res64
, binop(mulOp
, mkexpr(t1
), mkexpr(tmp
)) );
4155 assign( resHi
, unop(Iop_64HIto32
,mkexpr(res64
)));
4156 assign( resLo
, unop(Iop_64to32
,mkexpr(res64
)));
4157 putIRegRDX(4, mkexpr(resHi
));
4158 putIRegRAX(4, mkexpr(resLo
));
4162 IRTemp res32
= newTemp(Ity_I32
);
4163 IRTemp resHi
= newTemp(Ity_I16
);
4164 IRTemp resLo
= newTemp(Ity_I16
);
4165 IROp mulOp
= syned
? Iop_MullS16
: Iop_MullU16
;
4166 UInt tBaseOp
= syned
? AMD64G_CC_OP_SMULB
: AMD64G_CC_OP_UMULB
;
4167 setFlags_MUL ( Ity_I16
, t1
, tmp
, tBaseOp
);
4168 assign( res32
, binop(mulOp
, mkexpr(t1
), mkexpr(tmp
)) );
4169 assign( resHi
, unop(Iop_32HIto16
,mkexpr(res32
)));
4170 assign( resLo
, unop(Iop_32to16
,mkexpr(res32
)));
4171 putIRegRDX(2, mkexpr(resHi
));
4172 putIRegRAX(2, mkexpr(resLo
));
4176 IRTemp res16
= newTemp(Ity_I16
);
4177 IRTemp resHi
= newTemp(Ity_I8
);
4178 IRTemp resLo
= newTemp(Ity_I8
);
4179 IROp mulOp
= syned
? Iop_MullS8
: Iop_MullU8
;
4180 UInt tBaseOp
= syned
? AMD64G_CC_OP_SMULB
: AMD64G_CC_OP_UMULB
;
4181 setFlags_MUL ( Ity_I8
, t1
, tmp
, tBaseOp
);
4182 assign( res16
, binop(mulOp
, mkexpr(t1
), mkexpr(tmp
)) );
4183 assign( resHi
, unop(Iop_16HIto8
,mkexpr(res16
)));
4184 assign( resLo
, unop(Iop_16to8
,mkexpr(res16
)));
4185 putIRegRAX(2, mkexpr(res16
));
4190 vpanic("codegen_mulL_A_D(amd64)");
4192 DIP("%s%c %s\n", syned
? "imul" : "mul", nameISize(sz
), tmp_txt
);
4196 /* Group 3 extended opcodes. We have to decide here whether F2 and F3
4199 ULong
dis_Grp3 ( const VexAbiInfo
* vbi
,
4200 Prefix pfx
, Int sz
, Long delta
, Bool
* decode_OK
)
4207 IRType ty
= szToITy(sz
);
4208 IRTemp t1
= newTemp(ty
);
4209 IRTemp dst1
, src
, dst0
;
4211 modrm
= getUChar(delta
);
4212 if (epartIsReg(modrm
)) {
4213 /* F2/XACQ and F3/XREL are always invalid in the non-mem case. */
4214 if (haveF2orF3(pfx
)) goto unhandled
;
4215 switch (gregLO3ofRM(modrm
)) {
4216 case 0: { /* TEST */
4218 d64
= getSDisp(imin(4,sz
), delta
);
4219 delta
+= imin(4,sz
);
4221 assign(dst1
, binop(mkSizedOp(ty
,Iop_And8
),
4222 getIRegE(sz
,pfx
,modrm
),
4223 mkU(ty
, d64
& mkSizeMask(sz
))));
4224 setFlags_DEP1( Iop_And8
, dst1
, ty
);
4225 DIP("test%c $%lld, %s\n",
4227 nameIRegE(sz
, pfx
, modrm
));
4235 putIRegE(sz
, pfx
, modrm
,
4236 unop(mkSizedOp(ty
,Iop_Not8
),
4237 getIRegE(sz
, pfx
, modrm
)));
4238 DIP("not%c %s\n", nameISize(sz
),
4239 nameIRegE(sz
, pfx
, modrm
));
4246 assign(dst0
, mkU(ty
,0));
4247 assign(src
, getIRegE(sz
, pfx
, modrm
));
4248 assign(dst1
, binop(mkSizedOp(ty
,Iop_Sub8
), mkexpr(dst0
),
4250 setFlags_DEP1_DEP2(Iop_Sub8
, dst0
, src
, ty
);
4251 putIRegE(sz
, pfx
, modrm
, mkexpr(dst1
));
4252 DIP("neg%c %s\n", nameISize(sz
), nameIRegE(sz
, pfx
, modrm
));
4254 case 4: /* MUL (unsigned widening) */
4257 assign(src
, getIRegE(sz
,pfx
,modrm
));
4258 codegen_mulL_A_D ( sz
, False
, src
,
4259 nameIRegE(sz
,pfx
,modrm
) );
4261 case 5: /* IMUL (signed widening) */
4264 assign(src
, getIRegE(sz
,pfx
,modrm
));
4265 codegen_mulL_A_D ( sz
, True
, src
,
4266 nameIRegE(sz
,pfx
,modrm
) );
4270 assign( t1
, getIRegE(sz
, pfx
, modrm
) );
4271 codegen_div ( sz
, t1
, False
);
4272 DIP("div%c %s\n", nameISize(sz
),
4273 nameIRegE(sz
, pfx
, modrm
));
4277 assign( t1
, getIRegE(sz
, pfx
, modrm
) );
4278 codegen_div ( sz
, t1
, True
);
4279 DIP("idiv%c %s\n", nameISize(sz
),
4280 nameIRegE(sz
, pfx
, modrm
));
4284 vpanic("Grp3(amd64,R)");
4287 /* Decide if F2/XACQ or F3/XREL might be valid. */
4288 Bool validF2orF3
= haveF2orF3(pfx
) ? False
: True
;
4289 if ((gregLO3ofRM(modrm
) == 3/*NEG*/ || gregLO3ofRM(modrm
) == 2/*NOT*/)
4290 && haveF2orF3(pfx
) && !haveF2andF3(pfx
) && haveLOCK(pfx
)) {
4293 if (!validF2orF3
) goto unhandled
;
4295 addr
= disAMode ( &len
, vbi
, pfx
, delta
, dis_buf
,
4296 /* we have to inform disAMode of any immediate
4298 gregLO3ofRM(modrm
)==0/*TEST*/
4304 assign(t1
, loadLE(ty
,mkexpr(addr
)));
4305 switch (gregLO3ofRM(modrm
)) {
4306 case 0: { /* TEST */
4307 d64
= getSDisp(imin(4,sz
), delta
);
4308 delta
+= imin(4,sz
);
4310 assign(dst1
, binop(mkSizedOp(ty
,Iop_And8
),
4312 mkU(ty
, d64
& mkSizeMask(sz
))));
4313 setFlags_DEP1( Iop_And8
, dst1
, ty
);
4314 DIP("test%c $%lld, %s\n", nameISize(sz
), d64
, dis_buf
);
4322 assign(dst1
, unop(mkSizedOp(ty
,Iop_Not8
), mkexpr(t1
)));
4323 if (haveLOCK(pfx
)) {
4324 casLE( mkexpr(addr
), mkexpr(t1
)/*expd*/, mkexpr(dst1
)/*new*/,
4325 guest_RIP_curr_instr
);
4327 storeLE( mkexpr(addr
), mkexpr(dst1
) );
4329 DIP("not%c %s\n", nameISize(sz
), dis_buf
);
4335 assign(dst0
, mkU(ty
,0));
4336 assign(src
, mkexpr(t1
));
4337 assign(dst1
, binop(mkSizedOp(ty
,Iop_Sub8
), mkexpr(dst0
),
4339 if (haveLOCK(pfx
)) {
4340 casLE( mkexpr(addr
), mkexpr(t1
)/*expd*/, mkexpr(dst1
)/*new*/,
4341 guest_RIP_curr_instr
);
4343 storeLE( mkexpr(addr
), mkexpr(dst1
) );
4345 setFlags_DEP1_DEP2(Iop_Sub8
, dst0
, src
, ty
);
4346 DIP("neg%c %s\n", nameISize(sz
), dis_buf
);
4348 case 4: /* MUL (unsigned widening) */
4349 codegen_mulL_A_D ( sz
, False
, t1
, dis_buf
);
4352 codegen_mulL_A_D ( sz
, True
, t1
, dis_buf
);
4355 codegen_div ( sz
, t1
, False
);
4356 DIP("div%c %s\n", nameISize(sz
), dis_buf
);
4359 codegen_div ( sz
, t1
, True
);
4360 DIP("idiv%c %s\n", nameISize(sz
), dis_buf
);
4364 vpanic("Grp3(amd64,M)");
4374 /* Group 4 extended opcodes. We have to decide here whether F2 and F3
4377 ULong
dis_Grp4 ( const VexAbiInfo
* vbi
,
4378 Prefix pfx
, Long delta
, Bool
* decode_OK
)
4384 IRTemp t1
= newTemp(ty
);
4385 IRTemp t2
= newTemp(ty
);
4389 modrm
= getUChar(delta
);
4390 if (epartIsReg(modrm
)) {
4391 /* F2/XACQ and F3/XREL are always invalid in the non-mem case. */
4392 if (haveF2orF3(pfx
)) goto unhandled
;
4393 assign(t1
, getIRegE(1, pfx
, modrm
));
4394 switch (gregLO3ofRM(modrm
)) {
4396 assign(t2
, binop(Iop_Add8
, mkexpr(t1
), mkU8(1)));
4397 putIRegE(1, pfx
, modrm
, mkexpr(t2
));
4398 setFlags_INC_DEC( True
, t2
, ty
);
4401 assign(t2
, binop(Iop_Sub8
, mkexpr(t1
), mkU8(1)));
4402 putIRegE(1, pfx
, modrm
, mkexpr(t2
));
4403 setFlags_INC_DEC( False
, t2
, ty
);
4410 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm
)),
4411 nameIRegE(1, pfx
, modrm
));
4413 /* Decide if F2/XACQ or F3/XREL might be valid. */
4414 Bool validF2orF3
= haveF2orF3(pfx
) ? False
: True
;
4415 if ((gregLO3ofRM(modrm
) == 0/*INC*/ || gregLO3ofRM(modrm
) == 1/*DEC*/)
4416 && haveF2orF3(pfx
) && !haveF2andF3(pfx
) && haveLOCK(pfx
)) {
4419 if (!validF2orF3
) goto unhandled
;
4421 IRTemp addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
4422 assign( t1
, loadLE(ty
, mkexpr(addr
)) );
4423 switch (gregLO3ofRM(modrm
)) {
4425 assign(t2
, binop(Iop_Add8
, mkexpr(t1
), mkU8(1)));
4426 if (haveLOCK(pfx
)) {
4427 casLE( mkexpr(addr
), mkexpr(t1
)/*expd*/, mkexpr(t2
)/*new*/,
4428 guest_RIP_curr_instr
);
4430 storeLE( mkexpr(addr
), mkexpr(t2
) );
4432 setFlags_INC_DEC( True
, t2
, ty
);
4435 assign(t2
, binop(Iop_Sub8
, mkexpr(t1
), mkU8(1)));
4436 if (haveLOCK(pfx
)) {
4437 casLE( mkexpr(addr
), mkexpr(t1
)/*expd*/, mkexpr(t2
)/*new*/,
4438 guest_RIP_curr_instr
);
4440 storeLE( mkexpr(addr
), mkexpr(t2
) );
4442 setFlags_INC_DEC( False
, t2
, ty
);
4449 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm
)), dis_buf
);
4458 /* Group 5 extended opcodes. We have to decide here whether F2 and F3
4461 ULong
dis_Grp5 ( const VexAbiInfo
* vbi
,
4462 Prefix pfx
, Int sz
, Long delta
,
4463 /*MOD*/DisResult
* dres
, /*OUT*/Bool
* decode_OK
)
4468 IRTemp addr
= IRTemp_INVALID
;
4469 IRType ty
= szToITy(sz
);
4470 IRTemp t1
= newTemp(ty
);
4471 IRTemp t2
= IRTemp_INVALID
;
4472 IRTemp t3
= IRTemp_INVALID
;
4477 modrm
= getUChar(delta
);
4478 if (epartIsReg(modrm
)) {
4479 /* F2/XACQ and F3/XREL are always invalid in the non-mem case.
4480 F2/CALL and F2/JMP may have bnd prefix. */
4483 && (gregLO3ofRM(modrm
) == 2 || gregLO3ofRM(modrm
) == 4)))
4485 assign(t1
, getIRegE(sz
,pfx
,modrm
));
4486 switch (gregLO3ofRM(modrm
)) {
4489 assign(t2
, binop(mkSizedOp(ty
,Iop_Add8
),
4490 mkexpr(t1
), mkU(ty
,1)));
4491 setFlags_INC_DEC( True
, t2
, ty
);
4492 putIRegE(sz
,pfx
,modrm
, mkexpr(t2
));
4496 assign(t2
, binop(mkSizedOp(ty
,Iop_Sub8
),
4497 mkexpr(t1
), mkU(ty
,1)));
4498 setFlags_INC_DEC( False
, t2
, ty
);
4499 putIRegE(sz
,pfx
,modrm
, mkexpr(t2
));
4501 case 2: /* call Ev */
4502 /* Ignore any sz value and operate as if sz==8. */
4503 if (!(sz
== 4 || sz
== 8)) goto unhandledR
;
4504 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
4506 t3
= newTemp(Ity_I64
);
4507 assign(t3
, getIRegE(sz
,pfx
,modrm
));
4508 t2
= newTemp(Ity_I64
);
4509 assign(t2
, binop(Iop_Sub64
, getIReg64(R_RSP
), mkU64(8)));
4510 putIReg64(R_RSP
, mkexpr(t2
));
4511 storeLE( mkexpr(t2
), mkU64(guest_RIP_bbstart
+delta
+1));
4512 make_redzone_AbiHint(vbi
, t2
, t3
/*nia*/, "call-Ev(reg)");
4513 jmp_treg(dres
, Ijk_Call
, t3
);
4514 vassert(dres
->whatNext
== Dis_StopHere
);
4517 case 4: /* jmp Ev */
4518 /* Ignore any sz value and operate as if sz==8. */
4519 if (!(sz
== 4 || sz
== 8)) goto unhandledR
;
4520 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
4522 t3
= newTemp(Ity_I64
);
4523 assign(t3
, getIRegE(sz
,pfx
,modrm
));
4524 jmp_treg(dres
, Ijk_Boring
, t3
);
4525 vassert(dres
->whatNext
== Dis_StopHere
);
4528 case 6: /* PUSH Ev */
4529 /* There is no encoding for 32-bit operand size; hence ... */
4530 if (sz
== 4) sz
= 8;
4531 if (sz
== 8 || sz
== 2) {
4532 ty
= szToITy(sz
); /* redo it, since sz might have changed */
4534 assign(t3
, getIRegE(sz
,pfx
,modrm
));
4535 t2
= newTemp(Ity_I64
);
4536 assign( t2
, binop(Iop_Sub64
,getIReg64(R_RSP
),mkU64(sz
)) );
4537 putIReg64(R_RSP
, mkexpr(t2
) );
4538 storeLE( mkexpr(t2
), mkexpr(t3
) );
4541 goto unhandledR
; /* awaiting test case */
4549 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm
)),
4550 showSz
? nameISize(sz
) : ' ',
4551 nameIRegE(sz
, pfx
, modrm
));
4553 /* Decide if F2/XACQ, F3/XREL, F2/CALL or F2/JMP might be valid. */
4554 Bool validF2orF3
= haveF2orF3(pfx
) ? False
: True
;
4555 if ((gregLO3ofRM(modrm
) == 0/*INC*/ || gregLO3ofRM(modrm
) == 1/*DEC*/)
4556 && haveF2orF3(pfx
) && !haveF2andF3(pfx
) && haveLOCK(pfx
)) {
4558 } else if ((gregLO3ofRM(modrm
) == 2 || gregLO3ofRM(modrm
) == 4)
4559 && (haveF2(pfx
) && !haveF3(pfx
))) {
4562 if (!validF2orF3
) goto unhandledM
;
4564 addr
= disAMode ( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
4565 if (gregLO3ofRM(modrm
) != 2 && gregLO3ofRM(modrm
) != 4
4566 && gregLO3ofRM(modrm
) != 6) {
4567 assign(t1
, loadLE(ty
,mkexpr(addr
)));
4569 switch (gregLO3ofRM(modrm
)) {
4572 assign(t2
, binop(mkSizedOp(ty
,Iop_Add8
),
4573 mkexpr(t1
), mkU(ty
,1)));
4574 if (haveLOCK(pfx
)) {
4575 casLE( mkexpr(addr
),
4576 mkexpr(t1
), mkexpr(t2
), guest_RIP_curr_instr
);
4578 storeLE(mkexpr(addr
),mkexpr(t2
));
4580 setFlags_INC_DEC( True
, t2
, ty
);
4584 assign(t2
, binop(mkSizedOp(ty
,Iop_Sub8
),
4585 mkexpr(t1
), mkU(ty
,1)));
4586 if (haveLOCK(pfx
)) {
4587 casLE( mkexpr(addr
),
4588 mkexpr(t1
), mkexpr(t2
), guest_RIP_curr_instr
);
4590 storeLE(mkexpr(addr
),mkexpr(t2
));
4592 setFlags_INC_DEC( False
, t2
, ty
);
4594 case 2: /* call Ev */
4595 /* Ignore any sz value and operate as if sz==8. */
4596 if (!(sz
== 4 || sz
== 8)) goto unhandledM
;
4597 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
4599 t3
= newTemp(Ity_I64
);
4600 assign(t3
, loadLE(Ity_I64
,mkexpr(addr
)));
4601 t2
= newTemp(Ity_I64
);
4602 assign(t2
, binop(Iop_Sub64
, getIReg64(R_RSP
), mkU64(8)));
4603 putIReg64(R_RSP
, mkexpr(t2
));
4604 storeLE( mkexpr(t2
), mkU64(guest_RIP_bbstart
+delta
+len
));
4605 make_redzone_AbiHint(vbi
, t2
, t3
/*nia*/, "call-Ev(mem)");
4606 jmp_treg(dres
, Ijk_Call
, t3
);
4607 vassert(dres
->whatNext
== Dis_StopHere
);
4610 case 4: /* JMP Ev */
4611 /* Ignore any sz value and operate as if sz==8. */
4612 if (!(sz
== 4 || sz
== 8)) goto unhandledM
;
4613 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
4615 t3
= newTemp(Ity_I64
);
4616 assign(t3
, loadLE(Ity_I64
,mkexpr(addr
)));
4617 jmp_treg(dres
, Ijk_Boring
, t3
);
4618 vassert(dres
->whatNext
== Dis_StopHere
);
4621 case 6: /* PUSH Ev */
4622 /* There is no encoding for 32-bit operand size; hence ... */
4623 if (sz
== 4) sz
= 8;
4624 if (sz
== 8 || sz
== 2) {
4625 ty
= szToITy(sz
); /* redo it, since sz might have changed */
4627 assign(t3
, loadLE(ty
,mkexpr(addr
)));
4628 t2
= newTemp(Ity_I64
);
4629 assign( t2
, binop(Iop_Sub64
,getIReg64(R_RSP
),mkU64(sz
)) );
4630 putIReg64(R_RSP
, mkexpr(t2
) );
4631 storeLE( mkexpr(t2
), mkexpr(t3
) );
4634 goto unhandledM
; /* awaiting test case */
4642 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm
)),
4643 showSz
? nameISize(sz
) : ' ',
4650 /*------------------------------------------------------------*/
4651 /*--- Disassembling string ops (including REP prefixes) ---*/
4652 /*------------------------------------------------------------*/
4654 /* Code shared by all the string ops */
4656 void dis_string_op_increment ( Int sz
, IRTemp t_inc
)
4659 if (sz
== 8 || sz
== 4 || sz
== 2) {
4661 if (sz
== 4) logSz
= 2;
4662 if (sz
== 8) logSz
= 3;
4664 binop(Iop_Shl64
, IRExpr_Get( OFFB_DFLAG
, Ity_I64
),
4668 IRExpr_Get( OFFB_DFLAG
, Ity_I64
) );
4673 void dis_string_op( void (*dis_OP
)( Int
, IRTemp
, Prefix pfx
),
4674 Int sz
, const HChar
* name
, Prefix pfx
)
4676 IRTemp t_inc
= newTemp(Ity_I64
);
4677 /* Really we ought to inspect the override prefixes, but we don't.
4678 The following assertion catches any resulting sillyness. */
4679 vassert(pfx
== clearSegBits(pfx
));
4680 dis_string_op_increment(sz
, t_inc
);
4681 dis_OP( sz
, t_inc
, pfx
);
4682 DIP("%s%c\n", name
, nameISize(sz
));
4686 void dis_MOVS ( Int sz
, IRTemp t_inc
, Prefix pfx
)
4688 IRType ty
= szToITy(sz
);
4689 IRTemp td
= newTemp(Ity_I64
); /* RDI */
4690 IRTemp ts
= newTemp(Ity_I64
); /* RSI */
4691 IRExpr
*incd
, *incs
;
4694 assign( td
, unop(Iop_32Uto64
, getIReg32(R_RDI
)) );
4695 assign( ts
, unop(Iop_32Uto64
, getIReg32(R_RSI
)) );
4697 assign( td
, getIReg64(R_RDI
) );
4698 assign( ts
, getIReg64(R_RSI
) );
4701 storeLE( mkexpr(td
), loadLE(ty
,mkexpr(ts
)) );
4703 incd
= binop(Iop_Add64
, mkexpr(td
), mkexpr(t_inc
));
4704 incs
= binop(Iop_Add64
, mkexpr(ts
), mkexpr(t_inc
));
4706 incd
= unop(Iop_32Uto64
, unop(Iop_64to32
, incd
));
4707 incs
= unop(Iop_32Uto64
, unop(Iop_64to32
, incs
));
4709 putIReg64( R_RDI
, incd
);
4710 putIReg64( R_RSI
, incs
);
4714 void dis_LODS ( Int sz
, IRTemp t_inc
, Prefix pfx
)
4716 IRType ty
= szToITy(sz
);
4717 IRTemp ts
= newTemp(Ity_I64
); /* RSI */
4721 assign( ts
, unop(Iop_32Uto64
, getIReg32(R_RSI
)) );
4723 assign( ts
, getIReg64(R_RSI
) );
4725 putIRegRAX ( sz
, loadLE(ty
, mkexpr(ts
)) );
4727 incs
= binop(Iop_Add64
, mkexpr(ts
), mkexpr(t_inc
));
4729 incs
= unop(Iop_32Uto64
, unop(Iop_64to32
, incs
));
4730 putIReg64( R_RSI
, incs
);
4734 void dis_STOS ( Int sz
, IRTemp t_inc
, Prefix pfx
)
4736 IRType ty
= szToITy(sz
);
4737 IRTemp ta
= newTemp(ty
); /* rAX */
4738 IRTemp td
= newTemp(Ity_I64
); /* RDI */
4741 assign( ta
, getIRegRAX(sz
) );
4744 assign( td
, unop(Iop_32Uto64
, getIReg32(R_RDI
)) );
4746 assign( td
, getIReg64(R_RDI
) );
4748 storeLE( mkexpr(td
), mkexpr(ta
) );
4750 incd
= binop(Iop_Add64
, mkexpr(td
), mkexpr(t_inc
));
4752 incd
= unop(Iop_32Uto64
, unop(Iop_64to32
, incd
));
4753 putIReg64( R_RDI
, incd
);
4757 void dis_CMPS ( Int sz
, IRTemp t_inc
, Prefix pfx
)
4759 IRType ty
= szToITy(sz
);
4760 IRTemp tdv
= newTemp(ty
); /* (RDI) */
4761 IRTemp tsv
= newTemp(ty
); /* (RSI) */
4762 IRTemp td
= newTemp(Ity_I64
); /* RDI */
4763 IRTemp ts
= newTemp(Ity_I64
); /* RSI */
4764 IRExpr
*incd
, *incs
;
4767 assign( td
, unop(Iop_32Uto64
, getIReg32(R_RDI
)) );
4768 assign( ts
, unop(Iop_32Uto64
, getIReg32(R_RSI
)) );
4770 assign( td
, getIReg64(R_RDI
) );
4771 assign( ts
, getIReg64(R_RSI
) );
4774 assign( tdv
, loadLE(ty
,mkexpr(td
)) );
4776 assign( tsv
, loadLE(ty
,mkexpr(ts
)) );
4778 setFlags_DEP1_DEP2 ( Iop_Sub8
, tsv
, tdv
, ty
);
4780 incd
= binop(Iop_Add64
, mkexpr(td
), mkexpr(t_inc
));
4781 incs
= binop(Iop_Add64
, mkexpr(ts
), mkexpr(t_inc
));
4783 incd
= unop(Iop_32Uto64
, unop(Iop_64to32
, incd
));
4784 incs
= unop(Iop_32Uto64
, unop(Iop_64to32
, incs
));
4786 putIReg64( R_RDI
, incd
);
4787 putIReg64( R_RSI
, incs
);
4791 void dis_SCAS ( Int sz
, IRTemp t_inc
, Prefix pfx
)
4793 IRType ty
= szToITy(sz
);
4794 IRTemp ta
= newTemp(ty
); /* rAX */
4795 IRTemp td
= newTemp(Ity_I64
); /* RDI */
4796 IRTemp tdv
= newTemp(ty
); /* (RDI) */
4799 assign( ta
, getIRegRAX(sz
) );
4802 assign( td
, unop(Iop_32Uto64
, getIReg32(R_RDI
)) );
4804 assign( td
, getIReg64(R_RDI
) );
4806 assign( tdv
, loadLE(ty
,mkexpr(td
)) );
4808 setFlags_DEP1_DEP2 ( Iop_Sub8
, ta
, tdv
, ty
);
4810 incd
= binop(Iop_Add64
, mkexpr(td
), mkexpr(t_inc
));
4812 incd
= unop(Iop_32Uto64
, unop(Iop_64to32
, incd
));
4813 putIReg64( R_RDI
, incd
);
4817 /* Wrap the appropriate string op inside a REP/REPE/REPNE. We assume
4818 the insn is the last one in the basic block, and so emit a jump to
4819 the next insn, rather than just falling through. */
4821 void dis_REP_op ( /*MOD*/DisResult
* dres
,
4823 void (*dis_OP
)(Int
, IRTemp
, Prefix
),
4824 Int sz
, Addr64 rip
, Addr64 rip_next
, const HChar
* name
,
4827 IRTemp t_inc
= newTemp(Ity_I64
);
4831 /* Really we ought to inspect the override prefixes, but we don't.
4832 The following assertion catches any resulting sillyness. */
4833 vassert(pfx
== clearSegBits(pfx
));
4836 tc
= newTemp(Ity_I32
); /* ECX */
4837 assign( tc
, getIReg32(R_RCX
) );
4838 cmp
= binop(Iop_CmpEQ32
, mkexpr(tc
), mkU32(0));
4840 tc
= newTemp(Ity_I64
); /* RCX */
4841 assign( tc
, getIReg64(R_RCX
) );
4842 cmp
= binop(Iop_CmpEQ64
, mkexpr(tc
), mkU64(0));
4845 stmt( IRStmt_Exit( cmp
, Ijk_Boring
,
4846 IRConst_U64(rip_next
), OFFB_RIP
) );
4849 putIReg32(R_RCX
, binop(Iop_Sub32
, mkexpr(tc
), mkU32(1)) );
4851 putIReg64(R_RCX
, binop(Iop_Sub64
, mkexpr(tc
), mkU64(1)) );
4853 dis_string_op_increment(sz
, t_inc
);
4854 dis_OP (sz
, t_inc
, pfx
);
4856 if (cond
== AMD64CondAlways
) {
4857 jmp_lit(dres
, Ijk_Boring
, rip
);
4858 vassert(dres
->whatNext
== Dis_StopHere
);
4860 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(cond
),
4864 jmp_lit(dres
, Ijk_Boring
, rip_next
);
4865 vassert(dres
->whatNext
== Dis_StopHere
);
4867 DIP("%s%c\n", name
, nameISize(sz
));
4871 /*------------------------------------------------------------*/
4872 /*--- Arithmetic, etc. ---*/
4873 /*------------------------------------------------------------*/
4875 /* IMUL E, G. Supplied eip points to the modR/M byte. */
4877 ULong
dis_mul_E_G ( const VexAbiInfo
* vbi
,
4884 UChar rm
= getUChar(delta0
);
4885 IRType ty
= szToITy(size
);
4886 IRTemp te
= newTemp(ty
);
4887 IRTemp tg
= newTemp(ty
);
4888 IRTemp resLo
= newTemp(ty
);
4890 assign( tg
, getIRegG(size
, pfx
, rm
) );
4891 if (epartIsReg(rm
)) {
4892 assign( te
, getIRegE(size
, pfx
, rm
) );
4894 IRTemp addr
= disAMode( &alen
, vbi
, pfx
, delta0
, dis_buf
, 0 );
4895 assign( te
, loadLE(ty
,mkexpr(addr
)) );
4898 setFlags_MUL ( ty
, te
, tg
, AMD64G_CC_OP_SMULB
);
4900 assign( resLo
, binop( mkSizedOp(ty
, Iop_Mul8
), mkexpr(te
), mkexpr(tg
) ) );
4902 putIRegG(size
, pfx
, rm
, mkexpr(resLo
) );
4904 if (epartIsReg(rm
)) {
4905 DIP("imul%c %s, %s\n", nameISize(size
),
4906 nameIRegE(size
,pfx
,rm
),
4907 nameIRegG(size
,pfx
,rm
));
4910 DIP("imul%c %s, %s\n", nameISize(size
),
4912 nameIRegG(size
,pfx
,rm
));
4918 /* IMUL I * E -> G. Supplied rip points to the modR/M byte. */
4920 ULong
dis_imul_I_E_G ( const VexAbiInfo
* vbi
,
4929 UChar rm
= getUChar(delta
);
4930 IRType ty
= szToITy(size
);
4931 IRTemp te
= newTemp(ty
);
4932 IRTemp tl
= newTemp(ty
);
4933 IRTemp resLo
= newTemp(ty
);
4935 vassert(/*size == 1 ||*/ size
== 2 || size
== 4 || size
== 8);
4937 if (epartIsReg(rm
)) {
4938 assign(te
, getIRegE(size
, pfx
, rm
));
4941 IRTemp addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
4943 assign(te
, loadLE(ty
, mkexpr(addr
)));
4946 d64
= getSDisp(imin(4,litsize
),delta
);
4947 delta
+= imin(4,litsize
);
4949 d64
&= mkSizeMask(size
);
4950 assign(tl
, mkU(ty
,d64
));
4952 assign( resLo
, binop( mkSizedOp(ty
, Iop_Mul8
), mkexpr(te
), mkexpr(tl
) ));
4954 setFlags_MUL ( ty
, te
, tl
, AMD64G_CC_OP_SMULB
);
4956 putIRegG(size
, pfx
, rm
, mkexpr(resLo
));
4958 DIP("imul%c $%lld, %s, %s\n",
4959 nameISize(size
), d64
,
4960 ( epartIsReg(rm
) ? nameIRegE(size
,pfx
,rm
) : dis_buf
),
4961 nameIRegG(size
,pfx
,rm
) );
4966 /* Generate an IR sequence to do a popcount operation on the supplied
4967 IRTemp, and return a new IRTemp holding the result. 'ty' may be
4968 Ity_I16, Ity_I32 or Ity_I64 only. */
4969 static IRTemp
gen_POPCOUNT ( IRType ty
, IRTemp src
)
4972 if (ty
== Ity_I16
) {
4973 IRTemp old
= IRTemp_INVALID
;
4974 IRTemp nyu
= IRTemp_INVALID
;
4975 IRTemp mask
[4], shift
[4];
4976 for (i
= 0; i
< 4; i
++) {
4977 mask
[i
] = newTemp(ty
);
4980 assign(mask
[0], mkU16(0x5555));
4981 assign(mask
[1], mkU16(0x3333));
4982 assign(mask
[2], mkU16(0x0F0F));
4983 assign(mask
[3], mkU16(0x00FF));
4985 for (i
= 0; i
< 4; i
++) {
4993 binop(Iop_Shr16
, mkexpr(old
), mkU8(shift
[i
])),
4999 if (ty
== Ity_I32
) {
5000 IRTemp old
= IRTemp_INVALID
;
5001 IRTemp nyu
= IRTemp_INVALID
;
5002 IRTemp mask
[5], shift
[5];
5003 for (i
= 0; i
< 5; i
++) {
5004 mask
[i
] = newTemp(ty
);
5007 assign(mask
[0], mkU32(0x55555555));
5008 assign(mask
[1], mkU32(0x33333333));
5009 assign(mask
[2], mkU32(0x0F0F0F0F));
5010 assign(mask
[3], mkU32(0x00FF00FF));
5011 assign(mask
[4], mkU32(0x0000FFFF));
5013 for (i
= 0; i
< 5; i
++) {
5021 binop(Iop_Shr32
, mkexpr(old
), mkU8(shift
[i
])),
5027 if (ty
== Ity_I64
) {
5028 IRTemp old
= IRTemp_INVALID
;
5029 IRTemp nyu
= IRTemp_INVALID
;
5030 IRTemp mask
[6], shift
[6];
5031 for (i
= 0; i
< 6; i
++) {
5032 mask
[i
] = newTemp(ty
);
5035 assign(mask
[0], mkU64(0x5555555555555555ULL
));
5036 assign(mask
[1], mkU64(0x3333333333333333ULL
));
5037 assign(mask
[2], mkU64(0x0F0F0F0F0F0F0F0FULL
));
5038 assign(mask
[3], mkU64(0x00FF00FF00FF00FFULL
));
5039 assign(mask
[4], mkU64(0x0000FFFF0000FFFFULL
));
5040 assign(mask
[5], mkU64(0x00000000FFFFFFFFULL
));
5042 for (i
= 0; i
< 6; i
++) {
5050 binop(Iop_Shr64
, mkexpr(old
), mkU8(shift
[i
])),
5061 /* Generate an IR sequence to do a count-leading-zeroes operation on
5062 the supplied IRTemp, and return a new IRTemp holding the result.
5063 'ty' may be Ity_I16, Ity_I32 or Ity_I64 only. In the case where
5064 the argument is zero, return the number of bits in the word (the
5065 natural semantics). */
5066 static IRTemp
gen_LZCNT ( IRType ty
, IRTemp src
)
5068 vassert(ty
== Ity_I64
|| ty
== Ity_I32
|| ty
== Ity_I16
);
5070 IRTemp src64
= newTemp(Ity_I64
);
5071 assign(src64
, widenUto64( mkexpr(src
) ));
5073 IRTemp src64x
= newTemp(Ity_I64
);
5075 binop(Iop_Shl64
, mkexpr(src64
),
5076 mkU8(64 - 8 * sizeofIRType(ty
))));
5078 // Clz64 has undefined semantics when its input is zero, so
5079 // special-case around that.
5080 IRTemp res64
= newTemp(Ity_I64
);
5083 binop(Iop_CmpEQ64
, mkexpr(src64x
), mkU64(0)),
5084 mkU64(8 * sizeofIRType(ty
)),
5085 unop(Iop_Clz64
, mkexpr(src64x
))
5088 IRTemp res
= newTemp(ty
);
5089 assign(res
, narrowTo(ty
, mkexpr(res64
)));
5094 /* Generate an IR sequence to do a count-trailing-zeroes operation on
5095 the supplied IRTemp, and return a new IRTemp holding the result.
5096 'ty' may be Ity_I16, Ity_I32 or Ity_I64 only. In the case where
5097 the argument is zero, return the number of bits in the word (the
5098 natural semantics). */
5099 static IRTemp
gen_TZCNT ( IRType ty
, IRTemp src
)
5101 vassert(ty
== Ity_I64
|| ty
== Ity_I32
|| ty
== Ity_I16
);
5103 IRTemp src64
= newTemp(Ity_I64
);
5104 assign(src64
, widenUto64( mkexpr(src
) ));
5106 // Ctz64 has undefined semantics when its input is zero, so
5107 // special-case around that.
5108 IRTemp res64
= newTemp(Ity_I64
);
5111 binop(Iop_CmpEQ64
, mkexpr(src64
), mkU64(0)),
5112 mkU64(8 * sizeofIRType(ty
)),
5113 unop(Iop_Ctz64
, mkexpr(src64
))
5116 IRTemp res
= newTemp(ty
);
5117 assign(res
, narrowTo(ty
, mkexpr(res64
)));
5122 /*------------------------------------------------------------*/
5124 /*--- x87 FLOATING POINT INSTRUCTIONS ---*/
5126 /*------------------------------------------------------------*/
5128 /* --- Helper functions for dealing with the register stack. --- */
5130 /* --- Set the emulation-warning pseudo-register. --- */
5132 static void put_emwarn ( IRExpr
* e
/* :: Ity_I32 */ )
5134 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I32
);
5135 stmt( IRStmt_Put( OFFB_EMNOTE
, e
) );
5138 /* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */
5140 static IRExpr
* mkQNaN64 ( void )
5142 /* QNaN is 0 2047 1 0(51times)
5143 == 0b 11111111111b 1 0(51times)
5144 == 0x7FF8 0000 0000 0000
5146 return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL
));
5149 /* --------- Get/put the top-of-stack pointer :: Ity_I32 --------- */
5151 static IRExpr
* get_ftop ( void )
5153 return IRExpr_Get( OFFB_FTOP
, Ity_I32
);
5156 static void put_ftop ( IRExpr
* e
)
5158 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I32
);
5159 stmt( IRStmt_Put( OFFB_FTOP
, e
) );
5162 /* --------- Get/put the C3210 bits. --------- */
5164 static IRExpr
* /* :: Ity_I64 */ get_C3210 ( void )
5166 return IRExpr_Get( OFFB_FC3210
, Ity_I64
);
5169 static void put_C3210 ( IRExpr
* e
/* :: Ity_I64 */ )
5171 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I64
);
5172 stmt( IRStmt_Put( OFFB_FC3210
, e
) );
5175 /* --------- Get/put the FPU rounding mode. --------- */
5176 static IRExpr
* /* :: Ity_I32 */ get_fpround ( void )
5178 return unop(Iop_64to32
, IRExpr_Get( OFFB_FPROUND
, Ity_I64
));
5181 static void put_fpround ( IRExpr
* /* :: Ity_I32 */ e
)
5183 vassert(typeOfIRExpr(irsb
->tyenv
, e
) == Ity_I32
);
5184 stmt( IRStmt_Put( OFFB_FPROUND
, unop(Iop_32Uto64
,e
) ) );
5188 /* --------- Synthesise a 2-bit FPU rounding mode. --------- */
5189 /* Produces a value in 0 .. 3, which is encoded as per the type
5190 IRRoundingMode. Since the guest_FPROUND value is also encoded as
5191 per IRRoundingMode, we merely need to get it and mask it for
5194 static IRExpr
* /* :: Ity_I32 */ get_roundingmode ( void )
5196 return binop( Iop_And32
, get_fpround(), mkU32(3) );
5199 static IRExpr
* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
5201 return mkU32(Irrm_NEAREST
);
5205 /* --------- Get/set FP register tag bytes. --------- */
5207 /* Given i, and some expression e, generate 'ST_TAG(i) = e'. */
5209 static void put_ST_TAG ( Int i
, IRExpr
* value
)
5212 vassert(typeOfIRExpr(irsb
->tyenv
, value
) == Ity_I8
);
5213 descr
= mkIRRegArray( OFFB_FPTAGS
, Ity_I8
, 8 );
5214 stmt( IRStmt_PutI( mkIRPutI(descr
, get_ftop(), i
, value
) ) );
5217 /* Given i, generate an expression yielding 'ST_TAG(i)'. This will be
5218 zero to indicate "Empty" and nonzero to indicate "NonEmpty". */
5220 static IRExpr
* get_ST_TAG ( Int i
)
5222 IRRegArray
* descr
= mkIRRegArray( OFFB_FPTAGS
, Ity_I8
, 8 );
5223 return IRExpr_GetI( descr
, get_ftop(), i
);
5227 /* --------- Get/set FP registers. --------- */
5229 /* Given i, and some expression e, emit 'ST(i) = e' and set the
5230 register's tag to indicate the register is full. The previous
5231 state of the register is not checked. */
5233 static void put_ST_UNCHECKED ( Int i
, IRExpr
* value
)
5236 vassert(typeOfIRExpr(irsb
->tyenv
, value
) == Ity_F64
);
5237 descr
= mkIRRegArray( OFFB_FPREGS
, Ity_F64
, 8 );
5238 stmt( IRStmt_PutI( mkIRPutI(descr
, get_ftop(), i
, value
) ) );
5239 /* Mark the register as in-use. */
5240 put_ST_TAG(i
, mkU8(1));
5243 /* Given i, and some expression e, emit
5244 ST(i) = is_full(i) ? NaN : e
5245 and set the tag accordingly.
5248 static void put_ST ( Int i
, IRExpr
* value
)
5252 IRExpr_ITE( binop(Iop_CmpNE8
, get_ST_TAG(i
), mkU8(0)),
5253 /* non-0 means full */
5262 /* Given i, generate an expression yielding 'ST(i)'. */
5264 static IRExpr
* get_ST_UNCHECKED ( Int i
)
5266 IRRegArray
* descr
= mkIRRegArray( OFFB_FPREGS
, Ity_F64
, 8 );
5267 return IRExpr_GetI( descr
, get_ftop(), i
);
5271 /* Given i, generate an expression yielding
5272 is_full(i) ? ST(i) : NaN
5275 static IRExpr
* get_ST ( Int i
)
5278 IRExpr_ITE( binop(Iop_CmpNE8
, get_ST_TAG(i
), mkU8(0)),
5279 /* non-0 means full */
5280 get_ST_UNCHECKED(i
),
5286 /* Given i, and some expression e, and a condition cond, generate IR
5287 which has the same effect as put_ST(i,e) when cond is true and has
5288 no effect when cond is false. Given the lack of proper
5289 if-then-else in the IR, this is pretty tricky.
5292 static void maybe_put_ST ( IRTemp cond
, Int i
, IRExpr
* value
)
5294 // new_tag = if cond then FULL else old_tag
5295 // new_val = if cond then (if old_tag==FULL then NaN else val)
5298 IRTemp old_tag
= newTemp(Ity_I8
);
5299 assign(old_tag
, get_ST_TAG(i
));
5300 IRTemp new_tag
= newTemp(Ity_I8
);
5302 IRExpr_ITE(mkexpr(cond
), mkU8(1)/*FULL*/, mkexpr(old_tag
)));
5304 IRTemp old_val
= newTemp(Ity_F64
);
5305 assign(old_val
, get_ST_UNCHECKED(i
));
5306 IRTemp new_val
= newTemp(Ity_F64
);
5308 IRExpr_ITE(mkexpr(cond
),
5309 IRExpr_ITE(binop(Iop_CmpNE8
, mkexpr(old_tag
), mkU8(0)),
5310 /* non-0 means full */
5316 put_ST_UNCHECKED(i
, mkexpr(new_val
));
5317 // put_ST_UNCHECKED incorrectly sets tag(i) to always be FULL. So
5318 // now set it to new_tag instead.
5319 put_ST_TAG(i
, mkexpr(new_tag
));
5322 /* Adjust FTOP downwards by one register. */
5324 static void fp_push ( void )
5326 put_ftop( binop(Iop_Sub32
, get_ftop(), mkU32(1)) );
5329 /* Adjust FTOP downwards by one register when COND is 1:I1. Else
5332 static void maybe_fp_push ( IRTemp cond
)
5334 put_ftop( binop(Iop_Sub32
, get_ftop(), unop(Iop_1Uto32
,mkexpr(cond
))) );
5337 /* Adjust FTOP upwards by one register, and mark the vacated register
5340 static void fp_pop ( void )
5342 put_ST_TAG(0, mkU8(0));
5343 put_ftop( binop(Iop_Add32
, get_ftop(), mkU32(1)) );
5346 /* Set the C2 bit of the FPU status register to e[0]. Assumes that
5349 static void set_C2 ( IRExpr
* e
)
5351 IRExpr
* cleared
= binop(Iop_And64
, get_C3210(), mkU64(~AMD64G_FC_MASK_C2
));
5352 put_C3210( binop(Iop_Or64
,
5354 binop(Iop_Shl64
, e
, mkU8(AMD64G_FC_SHIFT_C2
))) );
5357 /* Generate code to check that abs(d64) < 2^63 and is finite. This is
5358 used to do the range checks for FSIN, FCOS, FSINCOS and FPTAN. The
5359 test is simple, but the derivation of it is not so simple.
5361 The exponent field for an IEEE754 double is 11 bits. That means it
5362 can take values 0 through 0x7FF. If the exponent has value 0x7FF,
5363 the number is either a NaN or an Infinity and so is not finite.
5364 Furthermore, a finite value of exactly 2^63 is the smallest value
5365 that has exponent value 0x43E. Hence, what we need to do is
5366 extract the exponent, ignoring the sign bit and mantissa, and check
5367 it is < 0x43E, or <= 0x43D.
5369 To make this easily applicable to 32- and 64-bit targets, a
5370 roundabout approach is used. First the number is converted to I64,
5371 then the top 32 bits are taken. Shifting them right by 20 bits
5372 places the sign bit and exponent in the bottom 12 bits. Anding
5373 with 0x7FF gets rid of the sign bit, leaving just the exponent
5374 available for comparison.
5376 static IRTemp
math_IS_TRIG_ARG_FINITE_AND_IN_RANGE ( IRTemp d64
)
5378 IRTemp i64
= newTemp(Ity_I64
);
5379 assign(i64
, unop(Iop_ReinterpF64asI64
, mkexpr(d64
)) );
5380 IRTemp exponent
= newTemp(Ity_I32
);
5383 binop(Iop_Shr32
, unop(Iop_64HIto32
, mkexpr(i64
)), mkU8(20)),
5385 IRTemp in_range_and_finite
= newTemp(Ity_I1
);
5386 assign(in_range_and_finite
,
5387 binop(Iop_CmpLE32U
, mkexpr(exponent
), mkU32(0x43D)));
5388 return in_range_and_finite
;
5391 /* Invent a plausible-looking FPU status word value:
5392 ((ftop & 7) << 11) | (c3210 & 0x4700)
5394 static IRExpr
* get_FPU_sw ( void )
5400 binop(Iop_And32
, get_ftop(), mkU32(7)),
5402 binop(Iop_And32
, unop(Iop_64to32
, get_C3210()),
5408 /* Generate a dirty helper call that initialises the x87 state a la
5409 FINIT. If |guard| is NULL, it is done unconditionally. Otherwise
5410 |guard| is used as a guarding condition.
5412 static void gen_FINIT_SEQUENCE ( IRExpr
* guard
)
5414 /* Uses dirty helper:
5415 void amd64g_do_FINIT ( VexGuestAMD64State* ) */
5416 IRDirty
* d
= unsafeIRDirty_0_N (
5418 "amd64g_dirtyhelper_FINIT",
5419 &amd64g_dirtyhelper_FINIT
,
5420 mkIRExprVec_1( IRExpr_GSPTR() )
5423 /* declare we're writing guest state */
5425 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
5427 d
->fxState
[0].fx
= Ifx_Write
;
5428 d
->fxState
[0].offset
= OFFB_FTOP
;
5429 d
->fxState
[0].size
= sizeof(UInt
);
5431 d
->fxState
[1].fx
= Ifx_Write
;
5432 d
->fxState
[1].offset
= OFFB_FPREGS
;
5433 d
->fxState
[1].size
= 8 * sizeof(ULong
);
5435 d
->fxState
[2].fx
= Ifx_Write
;
5436 d
->fxState
[2].offset
= OFFB_FPTAGS
;
5437 d
->fxState
[2].size
= 8 * sizeof(UChar
);
5439 d
->fxState
[3].fx
= Ifx_Write
;
5440 d
->fxState
[3].offset
= OFFB_FPROUND
;
5441 d
->fxState
[3].size
= sizeof(ULong
);
5443 d
->fxState
[4].fx
= Ifx_Write
;
5444 d
->fxState
[4].offset
= OFFB_FC3210
;
5445 d
->fxState
[4].size
= sizeof(ULong
);
5450 stmt( IRStmt_Dirty(d
) );
5454 /* ------------------------------------------------------- */
5455 /* Given all that stack-mangling junk, we can now go ahead
5456 and describe FP instructions.
5459 /* ST(0) = ST(0) `op` mem64/32(addr)
5460 Need to check ST(0)'s tag on read, but not on write.
5463 void fp_do_op_mem_ST_0 ( IRTemp addr
, const HChar
* op_txt
, HChar
* dis_buf
,
5466 DIP("f%s%c %s\n", op_txt
, dbl
?'l':'s', dis_buf
);
5470 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5472 loadLE(Ity_F64
,mkexpr(addr
))
5477 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5479 unop(Iop_F32toF64
, loadLE(Ity_F32
,mkexpr(addr
)))
5485 /* ST(0) = mem64/32(addr) `op` ST(0)
5486 Need to check ST(0)'s tag on read, but not on write.
5489 void fp_do_oprev_mem_ST_0 ( IRTemp addr
, const HChar
* op_txt
, HChar
* dis_buf
,
5492 DIP("f%s%c %s\n", op_txt
, dbl
?'l':'s', dis_buf
);
5496 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5497 loadLE(Ity_F64
,mkexpr(addr
)),
5503 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5504 unop(Iop_F32toF64
, loadLE(Ity_F32
,mkexpr(addr
))),
5511 /* ST(dst) = ST(dst) `op` ST(src).
5512 Check dst and src tags when reading but not on write.
5515 void fp_do_op_ST_ST ( const HChar
* op_txt
, IROp op
, UInt st_src
, UInt st_dst
,
5518 DIP("f%s%s st(%u), st(%u)\n", op_txt
, pop_after
?"p":"", st_src
, st_dst
);
5522 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5530 /* ST(dst) = ST(src) `op` ST(dst).
5531 Check dst and src tags when reading but not on write.
5534 void fp_do_oprev_ST_ST ( const HChar
* op_txt
, IROp op
, UInt st_src
, UInt st_dst
,
5537 DIP("f%s%s st(%u), st(%u)\n", op_txt
, pop_after
?"p":"", st_src
, st_dst
);
5541 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5549 /* %rflags(Z,P,C) = UCOMI( st(0), st(i) ) */
5550 static void fp_do_ucomi_ST0_STi ( UInt i
, Bool pop_after
)
5552 DIP("fucomi%s %%st(0),%%st(%u)\n", pop_after
? "p" : "", i
);
5553 /* This is a bit of a hack (and isn't really right). It sets
5554 Z,P,C,O correctly, but forces A and S to zero, whereas the Intel
5555 documentation implies A and S are unchanged.
5557 /* It's also fishy in that it is used both for COMIP and
5558 UCOMIP, and they aren't the same (although similar). */
5559 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
5560 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
5565 binop(Iop_CmpF64
, get_ST(0), get_ST(i
))),
5568 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
5575 32to16( if e32 <s -32768 || e32 >s 32767 then -32768 else e32 )
5577 static IRExpr
* x87ishly_qnarrow_32_to_16 ( IRExpr
* e32
)
5579 IRTemp t32
= newTemp(Ity_I32
);
5585 binop(Iop_Add32
, mkexpr(t32
), mkU32(32768))),
5587 unop(Iop_32to16
, mkexpr(t32
)),
5593 ULong
dis_FPU ( /*OUT*/Bool
* decode_ok
,
5594 const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
)
5601 /* On entry, delta points at the second byte of the insn (the modrm
5603 UChar first_opcode
= getUChar(delta
-1);
5604 UChar modrm
= getUChar(delta
+0);
5606 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */
5608 if (first_opcode
== 0xD8) {
5611 /* bits 5,4,3 are an opcode extension, and the modRM also
5612 specifies an address. */
5613 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
5616 switch (gregLO3ofRM(modrm
)) {
5618 case 0: /* FADD single-real */
5619 fp_do_op_mem_ST_0 ( addr
, "add", dis_buf
, Iop_AddF64
, False
);
5622 case 1: /* FMUL single-real */
5623 fp_do_op_mem_ST_0 ( addr
, "mul", dis_buf
, Iop_MulF64
, False
);
5626 case 2: /* FCOM single-real */
5627 DIP("fcoms %s\n", dis_buf
);
5628 /* This forces C1 to zero, which isn't right. */
5629 /* The AMD documentation suggests that forcing C1 to
5630 zero is correct (Eliot Moss) */
5638 loadLE(Ity_F32
,mkexpr(addr
)))),
5644 case 3: /* FCOMP single-real */
5645 /* The AMD documentation suggests that forcing C1 to
5646 zero is correct (Eliot Moss) */
5647 DIP("fcomps %s\n", dis_buf
);
5648 /* This forces C1 to zero, which isn't right. */
5656 loadLE(Ity_F32
,mkexpr(addr
)))),
5663 case 4: /* FSUB single-real */
5664 fp_do_op_mem_ST_0 ( addr
, "sub", dis_buf
, Iop_SubF64
, False
);
5667 case 5: /* FSUBR single-real */
5668 fp_do_oprev_mem_ST_0 ( addr
, "subr", dis_buf
, Iop_SubF64
, False
);
5671 case 6: /* FDIV single-real */
5672 fp_do_op_mem_ST_0 ( addr
, "div", dis_buf
, Iop_DivF64
, False
);
5675 case 7: /* FDIVR single-real */
5676 fp_do_oprev_mem_ST_0 ( addr
, "divr", dis_buf
, Iop_DivF64
, False
);
5680 vex_printf("unhandled opc_aux = 0x%2x\n",
5681 (UInt
)gregLO3ofRM(modrm
));
5682 vex_printf("first_opcode == 0xD8\n");
5689 case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */
5690 fp_do_op_ST_ST ( "add", Iop_AddF64
, modrm
- 0xC0, 0, False
);
5693 case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */
5694 fp_do_op_ST_ST ( "mul", Iop_MulF64
, modrm
- 0xC8, 0, False
);
5697 /* Dunno if this is right */
5698 case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */
5699 r_dst
= (UInt
)modrm
- 0xD0;
5700 DIP("fcom %%st(0),%%st(%u)\n", r_dst
);
5701 /* This forces C1 to zero, which isn't right. */
5706 binop(Iop_CmpF64
, get_ST(0), get_ST(r_dst
)),
5712 /* Dunno if this is right */
5713 case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */
5714 r_dst
= (UInt
)modrm
- 0xD8;
5715 DIP("fcomp %%st(0),%%st(%u)\n", r_dst
);
5716 /* This forces C1 to zero, which isn't right. */
5721 binop(Iop_CmpF64
, get_ST(0), get_ST(r_dst
)),
5728 case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */
5729 fp_do_op_ST_ST ( "sub", Iop_SubF64
, modrm
- 0xE0, 0, False
);
5732 case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */
5733 fp_do_oprev_ST_ST ( "subr", Iop_SubF64
, modrm
- 0xE8, 0, False
);
5736 case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */
5737 fp_do_op_ST_ST ( "div", Iop_DivF64
, modrm
- 0xF0, 0, False
);
5740 case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */
5741 fp_do_oprev_ST_ST ( "divr", Iop_DivF64
, modrm
- 0xF8, 0, False
);
5750 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */
5752 if (first_opcode
== 0xD9) {
5755 /* bits 5,4,3 are an opcode extension, and the modRM also
5756 specifies an address. */
5757 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
5760 switch (gregLO3ofRM(modrm
)) {
5762 case 0: /* FLD single-real */
5763 DIP("flds %s\n", dis_buf
);
5765 put_ST(0, unop(Iop_F32toF64
,
5766 loadLE(Ity_F32
, mkexpr(addr
))));
5769 case 2: /* FST single-real */
5770 DIP("fsts %s\n", dis_buf
);
5771 storeLE(mkexpr(addr
),
5772 binop(Iop_F64toF32
, get_roundingmode(), get_ST(0)));
5775 case 3: /* FSTP single-real */
5776 DIP("fstps %s\n", dis_buf
);
5777 storeLE(mkexpr(addr
),
5778 binop(Iop_F64toF32
, get_roundingmode(), get_ST(0)));
5782 case 4: { /* FLDENV m28 */
5783 /* Uses dirty helper:
5784 VexEmNote amd64g_do_FLDENV ( VexGuestX86State*, HWord ) */
5785 IRTemp ew
= newTemp(Ity_I32
);
5786 IRTemp w64
= newTemp(Ity_I64
);
5787 IRDirty
* d
= unsafeIRDirty_0_N (
5789 "amd64g_dirtyhelper_FLDENV",
5790 &amd64g_dirtyhelper_FLDENV
,
5791 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
5794 /* declare we're reading memory */
5796 d
->mAddr
= mkexpr(addr
);
5799 /* declare we're writing guest state */
5801 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
5803 d
->fxState
[0].fx
= Ifx_Write
;
5804 d
->fxState
[0].offset
= OFFB_FTOP
;
5805 d
->fxState
[0].size
= sizeof(UInt
);
5807 d
->fxState
[1].fx
= Ifx_Write
;
5808 d
->fxState
[1].offset
= OFFB_FPTAGS
;
5809 d
->fxState
[1].size
= 8 * sizeof(UChar
);
5811 d
->fxState
[2].fx
= Ifx_Write
;
5812 d
->fxState
[2].offset
= OFFB_FPROUND
;
5813 d
->fxState
[2].size
= sizeof(ULong
);
5815 d
->fxState
[3].fx
= Ifx_Write
;
5816 d
->fxState
[3].offset
= OFFB_FC3210
;
5817 d
->fxState
[3].size
= sizeof(ULong
);
5819 stmt( IRStmt_Dirty(d
) );
5821 /* ew contains any emulation warning we may need to
5822 issue. If needed, side-exit to the next insn,
5823 reporting the warning, so that Valgrind's dispatcher
5824 sees the warning. */
5825 assign(ew
, unop(Iop_64to32
,mkexpr(w64
)) );
5826 put_emwarn( mkexpr(ew
) );
5829 binop(Iop_CmpNE32
, mkexpr(ew
), mkU32(0)),
5831 IRConst_U64( guest_RIP_bbstart
+delta
),
5836 DIP("fldenv %s\n", dis_buf
);
5840 case 5: {/* FLDCW */
5841 /* The only thing we observe in the control word is the
5842 rounding mode. Therefore, pass the 16-bit value
5843 (x87 native-format control word) to a clean helper,
5844 getting back a 64-bit value, the lower half of which
5845 is the FPROUND value to store, and the upper half of
5846 which is the emulation-warning token which may be
5849 /* ULong amd64h_check_fldcw ( ULong ); */
5850 IRTemp t64
= newTemp(Ity_I64
);
5851 IRTemp ew
= newTemp(Ity_I32
);
5852 DIP("fldcw %s\n", dis_buf
);
5853 assign( t64
, mkIRExprCCall(
5854 Ity_I64
, 0/*regparms*/,
5855 "amd64g_check_fldcw",
5856 &amd64g_check_fldcw
,
5859 loadLE(Ity_I16
, mkexpr(addr
)))
5864 put_fpround( unop(Iop_64to32
, mkexpr(t64
)) );
5865 assign( ew
, unop(Iop_64HIto32
, mkexpr(t64
) ) );
5866 put_emwarn( mkexpr(ew
) );
5867 /* Finally, if an emulation warning was reported,
5868 side-exit to the next insn, reporting the warning,
5869 so that Valgrind's dispatcher sees the warning. */
5872 binop(Iop_CmpNE32
, mkexpr(ew
), mkU32(0)),
5874 IRConst_U64( guest_RIP_bbstart
+delta
),
5881 case 6: { /* FNSTENV m28 */
5882 /* Uses dirty helper:
5883 void amd64g_do_FSTENV ( VexGuestAMD64State*, HWord ) */
5884 IRDirty
* d
= unsafeIRDirty_0_N (
5886 "amd64g_dirtyhelper_FSTENV",
5887 &amd64g_dirtyhelper_FSTENV
,
5888 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
5890 /* declare we're writing memory */
5892 d
->mAddr
= mkexpr(addr
);
5895 /* declare we're reading guest state */
5897 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
5899 d
->fxState
[0].fx
= Ifx_Read
;
5900 d
->fxState
[0].offset
= OFFB_FTOP
;
5901 d
->fxState
[0].size
= sizeof(UInt
);
5903 d
->fxState
[1].fx
= Ifx_Read
;
5904 d
->fxState
[1].offset
= OFFB_FPTAGS
;
5905 d
->fxState
[1].size
= 8 * sizeof(UChar
);
5907 d
->fxState
[2].fx
= Ifx_Read
;
5908 d
->fxState
[2].offset
= OFFB_FPROUND
;
5909 d
->fxState
[2].size
= sizeof(ULong
);
5911 d
->fxState
[3].fx
= Ifx_Read
;
5912 d
->fxState
[3].offset
= OFFB_FC3210
;
5913 d
->fxState
[3].size
= sizeof(ULong
);
5915 stmt( IRStmt_Dirty(d
) );
5917 DIP("fnstenv %s\n", dis_buf
);
5921 case 7: /* FNSTCW */
5922 /* Fake up a native x87 FPU control word. The only
5923 thing it depends on is FPROUND[1:0], so call a clean
5924 helper to cook it up. */
5925 /* ULong amd64g_create_fpucw ( ULong fpround ) */
5926 DIP("fnstcw %s\n", dis_buf
);
5932 "amd64g_create_fpucw", &amd64g_create_fpucw
,
5933 mkIRExprVec_1( unop(Iop_32Uto64
, get_fpround()) )
5940 vex_printf("unhandled opc_aux = 0x%2x\n",
5941 (UInt
)gregLO3ofRM(modrm
));
5942 vex_printf("first_opcode == 0xD9\n");
5950 case 0xC0 ... 0xC7: /* FLD %st(?) */
5951 r_src
= (UInt
)modrm
- 0xC0;
5952 DIP("fld %%st(%u)\n", r_src
);
5953 t1
= newTemp(Ity_F64
);
5954 assign(t1
, get_ST(r_src
));
5956 put_ST(0, mkexpr(t1
));
5959 case 0xC8 ... 0xCF: /* FXCH %st(?) */
5960 r_src
= (UInt
)modrm
- 0xC8;
5961 DIP("fxch %%st(%u)\n", r_src
);
5962 t1
= newTemp(Ity_F64
);
5963 t2
= newTemp(Ity_F64
);
5964 assign(t1
, get_ST(0));
5965 assign(t2
, get_ST(r_src
));
5966 put_ST_UNCHECKED(0, mkexpr(t2
));
5967 put_ST_UNCHECKED(r_src
, mkexpr(t1
));
5970 case 0xE0: /* FCHS */
5972 put_ST_UNCHECKED(0, unop(Iop_NegF64
, get_ST(0)));
5975 case 0xE1: /* FABS */
5977 put_ST_UNCHECKED(0, unop(Iop_AbsF64
, get_ST(0)));
5980 case 0xE5: { /* FXAM */
5981 /* This is an interesting one. It examines %st(0),
5982 regardless of whether the tag says it's empty or not.
5983 Here, just pass both the tag (in our format) and the
5984 value (as a double, actually a ULong) to a helper
5987 = mkIRExprVec_2( unop(Iop_8Uto64
, get_ST_TAG(0)),
5988 unop(Iop_ReinterpF64asI64
,
5989 get_ST_UNCHECKED(0)) );
5990 put_C3210(mkIRExprCCall(
5993 "amd64g_calculate_FXAM", &amd64g_calculate_FXAM
,
6000 case 0xE8: /* FLD1 */
6003 /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */
6004 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL
)));
6007 case 0xE9: /* FLDL2T */
6010 /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */
6011 put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL
)));
6014 case 0xEA: /* FLDL2E */
6017 /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */
6018 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL
)));
6021 case 0xEB: /* FLDPI */
6024 /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */
6025 put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL
)));
6028 case 0xEC: /* FLDLG2 */
6031 /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */
6032 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL
)));
6035 case 0xED: /* FLDLN2 */
6038 /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */
6039 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL
)));
6042 case 0xEE: /* FLDZ */
6045 /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */
6046 put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL
)));
6049 case 0xF0: /* F2XM1 */
6053 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6057 case 0xF1: /* FYL2X */
6061 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6067 case 0xF2: { /* FPTAN */
6069 IRTemp argD
= newTemp(Ity_F64
);
6070 assign(argD
, get_ST(0));
6071 IRTemp argOK
= math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD
);
6072 IRTemp resD
= newTemp(Ity_F64
);
6077 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6081 put_ST_UNCHECKED(0, mkexpr(resD
));
6082 /* Conditionally push 1.0 on the stack, if the arg is
6084 maybe_fp_push(argOK
);
6085 maybe_put_ST(argOK
, 0,
6086 IRExpr_Const(IRConst_F64(1.0)));
6087 set_C2( binop(Iop_Xor64
,
6088 unop(Iop_1Uto64
, mkexpr(argOK
)),
6093 case 0xF3: /* FPATAN */
6097 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6103 case 0xF4: { /* FXTRACT */
6104 IRTemp argF
= newTemp(Ity_F64
);
6105 IRTemp sigF
= newTemp(Ity_F64
);
6106 IRTemp expF
= newTemp(Ity_F64
);
6107 IRTemp argI
= newTemp(Ity_I64
);
6108 IRTemp sigI
= newTemp(Ity_I64
);
6109 IRTemp expI
= newTemp(Ity_I64
);
6111 assign( argF
, get_ST(0) );
6112 assign( argI
, unop(Iop_ReinterpF64asI64
, mkexpr(argF
)));
6115 Ity_I64
, 0/*regparms*/,
6116 "x86amd64g_calculate_FXTRACT",
6117 &x86amd64g_calculate_FXTRACT
,
6118 mkIRExprVec_2( mkexpr(argI
),
6119 mkIRExpr_HWord(0)/*sig*/ ))
6123 Ity_I64
, 0/*regparms*/,
6124 "x86amd64g_calculate_FXTRACT",
6125 &x86amd64g_calculate_FXTRACT
,
6126 mkIRExprVec_2( mkexpr(argI
),
6127 mkIRExpr_HWord(1)/*exp*/ ))
6129 assign( sigF
, unop(Iop_ReinterpI64asF64
, mkexpr(sigI
)) );
6130 assign( expF
, unop(Iop_ReinterpI64asF64
, mkexpr(expI
)) );
6132 put_ST_UNCHECKED(0, mkexpr(expF
) );
6135 put_ST(0, mkexpr(sigF
) );
6139 case 0xF5: { /* FPREM1 -- IEEE compliant */
6140 IRTemp a1
= newTemp(Ity_F64
);
6141 IRTemp a2
= newTemp(Ity_F64
);
6143 /* Do FPREM1 twice, once to get the remainder, and once
6144 to get the C3210 flag values. */
6145 assign( a1
, get_ST(0) );
6146 assign( a2
, get_ST(1) );
6149 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6154 triop(Iop_PRem1C3210F64
,
6155 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6161 case 0xF7: /* FINCSTP */
6163 put_ftop( binop(Iop_Add32
, get_ftop(), mkU32(1)) );
6166 case 0xF8: { /* FPREM -- not IEEE compliant */
6167 IRTemp a1
= newTemp(Ity_F64
);
6168 IRTemp a2
= newTemp(Ity_F64
);
6170 /* Do FPREM twice, once to get the remainder, and once
6171 to get the C3210 flag values. */
6172 assign( a1
, get_ST(0) );
6173 assign( a2
, get_ST(1) );
6176 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6181 triop(Iop_PRemC3210F64
,
6182 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6188 case 0xF9: /* FYL2XP1 */
6191 triop(Iop_Yl2xp1F64
,
6192 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6198 case 0xFA: /* FSQRT */
6202 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6206 case 0xFB: { /* FSINCOS */
6208 IRTemp argD
= newTemp(Ity_F64
);
6209 assign(argD
, get_ST(0));
6210 IRTemp argOK
= math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD
);
6211 IRTemp resD
= newTemp(Ity_F64
);
6216 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6220 put_ST_UNCHECKED(0, mkexpr(resD
));
6221 /* Conditionally push the cos value on the stack, if
6222 the arg is in range */
6223 maybe_fp_push(argOK
);
6224 maybe_put_ST(argOK
, 0,
6226 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6228 set_C2( binop(Iop_Xor64
,
6229 unop(Iop_1Uto64
, mkexpr(argOK
)),
6234 case 0xFC: /* FRNDINT */
6237 binop(Iop_RoundF64toInt
, get_roundingmode(), get_ST(0)) );
6240 case 0xFD: /* FSCALE */
6244 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6249 case 0xFE: /* FSIN */
6250 case 0xFF: { /* FCOS */
6251 Bool isSIN
= modrm
== 0xFE;
6252 DIP("%s\n", isSIN
? "fsin" : "fcos");
6253 IRTemp argD
= newTemp(Ity_F64
);
6254 assign(argD
, get_ST(0));
6255 IRTemp argOK
= math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD
);
6256 IRTemp resD
= newTemp(Ity_F64
);
6260 binop(isSIN
? Iop_SinF64
: Iop_CosF64
,
6261 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6265 put_ST_UNCHECKED(0, mkexpr(resD
));
6266 set_C2( binop(Iop_Xor64
,
6267 unop(Iop_1Uto64
, mkexpr(argOK
)),
6278 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */
6280 if (first_opcode
== 0xDA) {
6284 /* bits 5,4,3 are an opcode extension, and the modRM also
6285 specifies an address. */
6287 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
6289 switch (gregLO3ofRM(modrm
)) {
6291 case 0: /* FIADD m32int */ /* ST(0) += m32int */
6292 DIP("fiaddl %s\n", dis_buf
);
6296 case 1: /* FIMUL m32int */ /* ST(0) *= m32int */
6297 DIP("fimull %s\n", dis_buf
);
6301 case 4: /* FISUB m32int */ /* ST(0) -= m32int */
6302 DIP("fisubl %s\n", dis_buf
);
6306 case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */
6307 DIP("fisubrl %s\n", dis_buf
);
6311 case 6: /* FIDIV m32int */ /* ST(0) /= m32int */
6312 DIP("fisubl %s\n", dis_buf
);
6316 case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */
6317 DIP("fidivrl %s\n", dis_buf
);
6324 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6327 loadLE(Ity_I32
, mkexpr(addr
)))));
6333 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6335 loadLE(Ity_I32
, mkexpr(addr
))),
6340 vex_printf("unhandled opc_aux = 0x%2x\n",
6341 (UInt
)gregLO3ofRM(modrm
));
6342 vex_printf("first_opcode == 0xDA\n");
6351 case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */
6352 r_src
= (UInt
)modrm
- 0xC0;
6353 DIP("fcmovb %%st(%u), %%st(0)\n", r_src
);
6356 mk_amd64g_calculate_condition(AMD64CondB
),
6357 get_ST(r_src
), get_ST(0)) );
6360 case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */
6361 r_src
= (UInt
)modrm
- 0xC8;
6362 DIP("fcmovz %%st(%u), %%st(0)\n", r_src
);
6365 mk_amd64g_calculate_condition(AMD64CondZ
),
6366 get_ST(r_src
), get_ST(0)) );
6369 case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */
6370 r_src
= (UInt
)modrm
- 0xD0;
6371 DIP("fcmovbe %%st(%u), %%st(0)\n", r_src
);
6374 mk_amd64g_calculate_condition(AMD64CondBE
),
6375 get_ST(r_src
), get_ST(0)) );
6378 case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */
6379 r_src
= (UInt
)modrm
- 0xD8;
6380 DIP("fcmovu %%st(%u), %%st(0)\n", r_src
);
6383 mk_amd64g_calculate_condition(AMD64CondP
),
6384 get_ST(r_src
), get_ST(0)) );
6387 case 0xE9: /* FUCOMPP %st(0),%st(1) */
6388 DIP("fucompp %%st(0),%%st(1)\n");
6389 /* This forces C1 to zero, which isn't right. */
6394 binop(Iop_CmpF64
, get_ST(0), get_ST(1)),
6409 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */
6411 if (first_opcode
== 0xDB) {
6414 /* bits 5,4,3 are an opcode extension, and the modRM also
6415 specifies an address. */
6416 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
6419 switch (gregLO3ofRM(modrm
)) {
6421 case 0: /* FILD m32int */
6422 DIP("fildl %s\n", dis_buf
);
6424 put_ST(0, unop(Iop_I32StoF64
,
6425 loadLE(Ity_I32
, mkexpr(addr
))));
6428 case 1: /* FISTTPL m32 (SSE3) */
6429 DIP("fisttpl %s\n", dis_buf
);
6430 storeLE( mkexpr(addr
),
6431 binop(Iop_F64toI32S
, mkU32(Irrm_ZERO
), get_ST(0)) );
6435 case 2: /* FIST m32 */
6436 DIP("fistl %s\n", dis_buf
);
6437 storeLE( mkexpr(addr
),
6438 binop(Iop_F64toI32S
, get_roundingmode(), get_ST(0)) );
6441 case 3: /* FISTP m32 */
6442 DIP("fistpl %s\n", dis_buf
);
6443 storeLE( mkexpr(addr
),
6444 binop(Iop_F64toI32S
, get_roundingmode(), get_ST(0)) );
6448 case 5: { /* FLD extended-real */
6449 /* Uses dirty helper:
6450 ULong amd64g_loadF80le ( ULong )
6451 addr holds the address. First, do a dirty call to
6452 get hold of the data. */
6453 IRTemp val
= newTemp(Ity_I64
);
6454 IRExpr
** args
= mkIRExprVec_1 ( mkexpr(addr
) );
6456 IRDirty
* d
= unsafeIRDirty_1_N (
6459 "amd64g_dirtyhelper_loadF80le",
6460 &amd64g_dirtyhelper_loadF80le
,
6463 /* declare that we're reading memory */
6465 d
->mAddr
= mkexpr(addr
);
6468 /* execute the dirty call, dumping the result in val. */
6469 stmt( IRStmt_Dirty(d
) );
6471 put_ST(0, unop(Iop_ReinterpI64asF64
, mkexpr(val
)));
6473 DIP("fldt %s\n", dis_buf
);
6477 case 7: { /* FSTP extended-real */
6478 /* Uses dirty helper:
6479 void amd64g_storeF80le ( ULong addr, ULong data )
6482 = mkIRExprVec_2( mkexpr(addr
),
6483 unop(Iop_ReinterpF64asI64
, get_ST(0)) );
6485 IRDirty
* d
= unsafeIRDirty_0_N (
6487 "amd64g_dirtyhelper_storeF80le",
6488 &amd64g_dirtyhelper_storeF80le
,
6491 /* declare we're writing memory */
6493 d
->mAddr
= mkexpr(addr
);
6496 /* execute the dirty call. */
6497 stmt( IRStmt_Dirty(d
) );
6500 DIP("fstpt\n %s", dis_buf
);
6505 vex_printf("unhandled opc_aux = 0x%2x\n",
6506 (UInt
)gregLO3ofRM(modrm
));
6507 vex_printf("first_opcode == 0xDB\n");
6516 case 0xC0 ... 0xC7: /* FCMOVNB ST(i), ST(0) */
6517 r_src
= (UInt
)modrm
- 0xC0;
6518 DIP("fcmovnb %%st(%u), %%st(0)\n", r_src
);
6521 mk_amd64g_calculate_condition(AMD64CondNB
),
6522 get_ST(r_src
), get_ST(0)) );
6525 case 0xC8 ... 0xCF: /* FCMOVNE(NZ) ST(i), ST(0) */
6526 r_src
= (UInt
)modrm
- 0xC8;
6527 DIP("fcmovnz %%st(%u), %%st(0)\n", r_src
);
6531 mk_amd64g_calculate_condition(AMD64CondNZ
),
6538 case 0xD0 ... 0xD7: /* FCMOVNBE ST(i), ST(0) */
6539 r_src
= (UInt
)modrm
- 0xD0;
6540 DIP("fcmovnbe %%st(%u), %%st(0)\n", r_src
);
6544 mk_amd64g_calculate_condition(AMD64CondNBE
),
6551 case 0xD8 ... 0xDF: /* FCMOVNU ST(i), ST(0) */
6552 r_src
= (UInt
)modrm
- 0xD8;
6553 DIP("fcmovnu %%st(%u), %%st(0)\n", r_src
);
6557 mk_amd64g_calculate_condition(AMD64CondNP
),
6569 gen_FINIT_SEQUENCE(NULL
/*no guarding condition*/);
6574 case 0xE8 ... 0xEF: /* FUCOMI %st(0),%st(?) */
6575 fp_do_ucomi_ST0_STi( (UInt
)modrm
- 0xE8, False
);
6578 case 0xF0 ... 0xF7: /* FCOMI %st(0),%st(?) */
6579 fp_do_ucomi_ST0_STi( (UInt
)modrm
- 0xF0, False
);
6588 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */
6590 if (first_opcode
== 0xDC) {
6593 /* bits 5,4,3 are an opcode extension, and the modRM also
6594 specifies an address. */
6595 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
6598 switch (gregLO3ofRM(modrm
)) {
6600 case 0: /* FADD double-real */
6601 fp_do_op_mem_ST_0 ( addr
, "add", dis_buf
, Iop_AddF64
, True
);
6604 case 1: /* FMUL double-real */
6605 fp_do_op_mem_ST_0 ( addr
, "mul", dis_buf
, Iop_MulF64
, True
);
6608 case 2: /* FCOM double-real */
6609 DIP("fcoml %s\n", dis_buf
);
6610 /* This forces C1 to zero, which isn't right. */
6617 loadLE(Ity_F64
,mkexpr(addr
))),
6623 case 3: /* FCOMP double-real */
6624 DIP("fcompl %s\n", dis_buf
);
6625 /* This forces C1 to zero, which isn't right. */
6632 loadLE(Ity_F64
,mkexpr(addr
))),
6639 case 4: /* FSUB double-real */
6640 fp_do_op_mem_ST_0 ( addr
, "sub", dis_buf
, Iop_SubF64
, True
);
6643 case 5: /* FSUBR double-real */
6644 fp_do_oprev_mem_ST_0 ( addr
, "subr", dis_buf
, Iop_SubF64
, True
);
6647 case 6: /* FDIV double-real */
6648 fp_do_op_mem_ST_0 ( addr
, "div", dis_buf
, Iop_DivF64
, True
);
6651 case 7: /* FDIVR double-real */
6652 fp_do_oprev_mem_ST_0 ( addr
, "divr", dis_buf
, Iop_DivF64
, True
);
6656 vex_printf("unhandled opc_aux = 0x%2x\n",
6657 (UInt
)gregLO3ofRM(modrm
));
6658 vex_printf("first_opcode == 0xDC\n");
6667 case 0xC0 ... 0xC7: /* FADD %st(0),%st(?) */
6668 fp_do_op_ST_ST ( "add", Iop_AddF64
, 0, modrm
- 0xC0, False
);
6671 case 0xC8 ... 0xCF: /* FMUL %st(0),%st(?) */
6672 fp_do_op_ST_ST ( "mul", Iop_MulF64
, 0, modrm
- 0xC8, False
);
6675 case 0xE0 ... 0xE7: /* FSUBR %st(0),%st(?) */
6676 fp_do_oprev_ST_ST ( "subr", Iop_SubF64
, 0, modrm
- 0xE0, False
);
6679 case 0xE8 ... 0xEF: /* FSUB %st(0),%st(?) */
6680 fp_do_op_ST_ST ( "sub", Iop_SubF64
, 0, modrm
- 0xE8, False
);
6683 case 0xF0 ... 0xF7: /* FDIVR %st(0),%st(?) */
6684 fp_do_oprev_ST_ST ( "divr", Iop_DivF64
, 0, modrm
- 0xF0, False
);
6687 case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */
6688 fp_do_op_ST_ST ( "div", Iop_DivF64
, 0, modrm
- 0xF8, False
);
6698 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */
6700 if (first_opcode
== 0xDD) {
6704 /* bits 5,4,3 are an opcode extension, and the modRM also
6705 specifies an address. */
6706 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
6709 switch (gregLO3ofRM(modrm
)) {
6711 case 0: /* FLD double-real */
6712 DIP("fldl %s\n", dis_buf
);
6714 put_ST(0, loadLE(Ity_F64
, mkexpr(addr
)));
6717 case 1: /* FISTTPQ m64 (SSE3) */
6718 DIP("fistppll %s\n", dis_buf
);
6719 storeLE( mkexpr(addr
),
6720 binop(Iop_F64toI64S
, mkU32(Irrm_ZERO
), get_ST(0)) );
6724 case 2: /* FST double-real */
6725 DIP("fstl %s\n", dis_buf
);
6726 storeLE(mkexpr(addr
), get_ST(0));
6729 case 3: /* FSTP double-real */
6730 DIP("fstpl %s\n", dis_buf
);
6731 storeLE(mkexpr(addr
), get_ST(0));
6735 case 4: { /* FRSTOR m94/m108 */
6736 IRTemp ew
= newTemp(Ity_I32
);
6737 IRTemp w64
= newTemp(Ity_I64
);
6739 if ( have66(pfx
) ) {
6740 /* Uses dirty helper:
6741 VexEmNote amd64g_dirtyhelper_FRSTORS
6742 ( VexGuestAMD64State*, HWord ) */
6743 d
= unsafeIRDirty_0_N (
6745 "amd64g_dirtyhelper_FRSTORS",
6746 &amd64g_dirtyhelper_FRSTORS
,
6747 mkIRExprVec_1( mkexpr(addr
) )
6751 /* Uses dirty helper:
6752 VexEmNote amd64g_dirtyhelper_FRSTOR
6753 ( VexGuestAMD64State*, HWord ) */
6754 d
= unsafeIRDirty_0_N (
6756 "amd64g_dirtyhelper_FRSTOR",
6757 &amd64g_dirtyhelper_FRSTOR
,
6758 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
6764 /* declare we're reading memory */
6766 d
->mAddr
= mkexpr(addr
);
6767 /* d->mSize set above */
6769 /* declare we're writing guest state */
6771 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
6773 d
->fxState
[0].fx
= Ifx_Write
;
6774 d
->fxState
[0].offset
= OFFB_FTOP
;
6775 d
->fxState
[0].size
= sizeof(UInt
);
6777 d
->fxState
[1].fx
= Ifx_Write
;
6778 d
->fxState
[1].offset
= OFFB_FPREGS
;
6779 d
->fxState
[1].size
= 8 * sizeof(ULong
);
6781 d
->fxState
[2].fx
= Ifx_Write
;
6782 d
->fxState
[2].offset
= OFFB_FPTAGS
;
6783 d
->fxState
[2].size
= 8 * sizeof(UChar
);
6785 d
->fxState
[3].fx
= Ifx_Write
;
6786 d
->fxState
[3].offset
= OFFB_FPROUND
;
6787 d
->fxState
[3].size
= sizeof(ULong
);
6789 d
->fxState
[4].fx
= Ifx_Write
;
6790 d
->fxState
[4].offset
= OFFB_FC3210
;
6791 d
->fxState
[4].size
= sizeof(ULong
);
6793 stmt( IRStmt_Dirty(d
) );
6795 /* ew contains any emulation warning we may need to
6796 issue. If needed, side-exit to the next insn,
6797 reporting the warning, so that Valgrind's dispatcher
6798 sees the warning. */
6799 assign(ew
, unop(Iop_64to32
,mkexpr(w64
)) );
6800 put_emwarn( mkexpr(ew
) );
6803 binop(Iop_CmpNE32
, mkexpr(ew
), mkU32(0)),
6805 IRConst_U64( guest_RIP_bbstart
+delta
),
6810 if ( have66(pfx
) ) {
6811 DIP("frstors %s\n", dis_buf
);
6813 DIP("frstor %s\n", dis_buf
);
6818 case 6: { /* FNSAVE m94/m108 */
6820 if ( have66(pfx
) ) {
6821 /* Uses dirty helper:
6822 void amd64g_dirtyhelper_FNSAVES ( VexGuestAMD64State*,
6824 d
= unsafeIRDirty_0_N (
6826 "amd64g_dirtyhelper_FNSAVES",
6827 &amd64g_dirtyhelper_FNSAVES
,
6828 mkIRExprVec_1( mkexpr(addr
) )
6832 /* Uses dirty helper:
6833 void amd64g_dirtyhelper_FNSAVE ( VexGuestAMD64State*,
6835 d
= unsafeIRDirty_0_N (
6837 "amd64g_dirtyhelper_FNSAVE",
6838 &amd64g_dirtyhelper_FNSAVE
,
6839 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
6844 /* declare we're writing memory */
6846 d
->mAddr
= mkexpr(addr
);
6847 /* d->mSize set above */
6849 /* declare we're reading guest state */
6851 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
6853 d
->fxState
[0].fx
= Ifx_Read
;
6854 d
->fxState
[0].offset
= OFFB_FTOP
;
6855 d
->fxState
[0].size
= sizeof(UInt
);
6857 d
->fxState
[1].fx
= Ifx_Read
;
6858 d
->fxState
[1].offset
= OFFB_FPREGS
;
6859 d
->fxState
[1].size
= 8 * sizeof(ULong
);
6861 d
->fxState
[2].fx
= Ifx_Read
;
6862 d
->fxState
[2].offset
= OFFB_FPTAGS
;
6863 d
->fxState
[2].size
= 8 * sizeof(UChar
);
6865 d
->fxState
[3].fx
= Ifx_Read
;
6866 d
->fxState
[3].offset
= OFFB_FPROUND
;
6867 d
->fxState
[3].size
= sizeof(ULong
);
6869 d
->fxState
[4].fx
= Ifx_Read
;
6870 d
->fxState
[4].offset
= OFFB_FC3210
;
6871 d
->fxState
[4].size
= sizeof(ULong
);
6873 stmt( IRStmt_Dirty(d
) );
6875 if ( have66(pfx
) ) {
6876 DIP("fnsaves %s\n", dis_buf
);
6878 DIP("fnsave %s\n", dis_buf
);
6883 case 7: { /* FNSTSW m16 */
6884 IRExpr
* sw
= get_FPU_sw();
6885 vassert(typeOfIRExpr(irsb
->tyenv
, sw
) == Ity_I16
);
6886 storeLE( mkexpr(addr
), sw
);
6887 DIP("fnstsw %s\n", dis_buf
);
6892 vex_printf("unhandled opc_aux = 0x%2x\n",
6893 (UInt
)gregLO3ofRM(modrm
));
6894 vex_printf("first_opcode == 0xDD\n");
6901 case 0xC0 ... 0xC7: /* FFREE %st(?) */
6902 r_dst
= (UInt
)modrm
- 0xC0;
6903 DIP("ffree %%st(%u)\n", r_dst
);
6904 put_ST_TAG ( r_dst
, mkU8(0) );
6907 case 0xD0 ... 0xD7: /* FST %st(0),%st(?) */
6908 r_dst
= (UInt
)modrm
- 0xD0;
6909 DIP("fst %%st(0),%%st(%u)\n", r_dst
);
6910 /* P4 manual says: "If the destination operand is a
6911 non-empty register, the invalid-operation exception
6912 is not generated. Hence put_ST_UNCHECKED. */
6913 put_ST_UNCHECKED(r_dst
, get_ST(0));
6916 case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */
6917 r_dst
= (UInt
)modrm
- 0xD8;
6918 DIP("fstp %%st(0),%%st(%u)\n", r_dst
);
6919 /* P4 manual says: "If the destination operand is a
6920 non-empty register, the invalid-operation exception
6921 is not generated. Hence put_ST_UNCHECKED. */
6922 put_ST_UNCHECKED(r_dst
, get_ST(0));
6926 case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */
6927 r_dst
= (UInt
)modrm
- 0xE0;
6928 DIP("fucom %%st(0),%%st(%u)\n", r_dst
);
6929 /* This forces C1 to zero, which isn't right. */
6934 binop(Iop_CmpF64
, get_ST(0), get_ST(r_dst
)),
6940 case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */
6941 r_dst
= (UInt
)modrm
- 0xE8;
6942 DIP("fucomp %%st(0),%%st(%u)\n", r_dst
);
6943 /* This forces C1 to zero, which isn't right. */
6948 binop(Iop_CmpF64
, get_ST(0), get_ST(r_dst
)),
6961 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */
6963 if (first_opcode
== 0xDE) {
6967 /* bits 5,4,3 are an opcode extension, and the modRM also
6968 specifies an address. */
6970 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
6973 switch (gregLO3ofRM(modrm
)) {
6975 case 0: /* FIADD m16int */ /* ST(0) += m16int */
6976 DIP("fiaddw %s\n", dis_buf
);
6980 case 1: /* FIMUL m16int */ /* ST(0) *= m16int */
6981 DIP("fimulw %s\n", dis_buf
);
6985 case 4: /* FISUB m16int */ /* ST(0) -= m16int */
6986 DIP("fisubw %s\n", dis_buf
);
6990 case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */
6991 DIP("fisubrw %s\n", dis_buf
);
6995 case 6: /* FIDIV m16int */ /* ST(0) /= m16int */
6996 DIP("fisubw %s\n", dis_buf
);
7000 case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */
7001 DIP("fidivrw %s\n", dis_buf
);
7008 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
7012 loadLE(Ity_I16
, mkexpr(addr
))))));
7018 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
7021 loadLE(Ity_I16
, mkexpr(addr
)))),
7026 vex_printf("unhandled opc_aux = 0x%2x\n",
7027 (UInt
)gregLO3ofRM(modrm
));
7028 vex_printf("first_opcode == 0xDE\n");
7037 case 0xC0 ... 0xC7: /* FADDP %st(0),%st(?) */
7038 fp_do_op_ST_ST ( "add", Iop_AddF64
, 0, modrm
- 0xC0, True
);
7041 case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */
7042 fp_do_op_ST_ST ( "mul", Iop_MulF64
, 0, modrm
- 0xC8, True
);
7045 case 0xD9: /* FCOMPP %st(0),%st(1) */
7046 DIP("fcompp %%st(0),%%st(1)\n");
7047 /* This forces C1 to zero, which isn't right. */
7052 binop(Iop_CmpF64
, get_ST(0), get_ST(1)),
7060 case 0xE0 ... 0xE7: /* FSUBRP %st(0),%st(?) */
7061 fp_do_oprev_ST_ST ( "subr", Iop_SubF64
, 0, modrm
- 0xE0, True
);
7064 case 0xE8 ... 0xEF: /* FSUBP %st(0),%st(?) */
7065 fp_do_op_ST_ST ( "sub", Iop_SubF64
, 0, modrm
- 0xE8, True
);
7068 case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */
7069 fp_do_oprev_ST_ST ( "divr", Iop_DivF64
, 0, modrm
- 0xF0, True
);
7072 case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */
7073 fp_do_op_ST_ST ( "div", Iop_DivF64
, 0, modrm
- 0xF8, True
);
7083 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */
7085 if (first_opcode
== 0xDF) {
7089 /* bits 5,4,3 are an opcode extension, and the modRM also
7090 specifies an address. */
7091 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
7094 switch (gregLO3ofRM(modrm
)) {
7096 case 0: /* FILD m16int */
7097 DIP("fildw %s\n", dis_buf
);
7099 put_ST(0, unop(Iop_I32StoF64
,
7101 loadLE(Ity_I16
, mkexpr(addr
)))));
7104 case 1: /* FISTTPS m16 (SSE3) */
7105 DIP("fisttps %s\n", dis_buf
);
7106 storeLE( mkexpr(addr
),
7107 x87ishly_qnarrow_32_to_16(
7108 binop(Iop_F64toI32S
, mkU32(Irrm_ZERO
), get_ST(0)) ));
7112 case 2: /* FIST m16 */
7113 DIP("fists %s\n", dis_buf
);
7114 storeLE( mkexpr(addr
),
7115 x87ishly_qnarrow_32_to_16(
7116 binop(Iop_F64toI32S
, get_roundingmode(), get_ST(0)) ));
7119 case 3: /* FISTP m16 */
7120 DIP("fistps %s\n", dis_buf
);
7121 storeLE( mkexpr(addr
),
7122 x87ishly_qnarrow_32_to_16(
7123 binop(Iop_F64toI32S
, get_roundingmode(), get_ST(0)) ));
7127 case 5: /* FILD m64 */
7128 DIP("fildll %s\n", dis_buf
);
7130 put_ST(0, binop(Iop_I64StoF64
,
7132 loadLE(Ity_I64
, mkexpr(addr
))));
7135 case 7: /* FISTP m64 */
7136 DIP("fistpll %s\n", dis_buf
);
7137 storeLE( mkexpr(addr
),
7138 binop(Iop_F64toI64S
, get_roundingmode(), get_ST(0)) );
7143 vex_printf("unhandled opc_aux = 0x%2x\n",
7144 (UInt
)gregLO3ofRM(modrm
));
7145 vex_printf("first_opcode == 0xDF\n");
7154 case 0xC0: /* FFREEP %st(0) */
7155 DIP("ffreep %%st(%d)\n", 0);
7156 put_ST_TAG ( 0, mkU8(0) );
7160 case 0xE0: /* FNSTSW %ax */
7161 DIP("fnstsw %%ax\n");
7162 /* Invent a plausible-looking FPU status word value and
7164 ((ftop & 7) << 11) | (c3210 & 0x4700)
7171 binop(Iop_And32
, get_ftop(), mkU32(7)),
7174 unop(Iop_64to32
, get_C3210()),
7179 case 0xE8 ... 0xEF: /* FUCOMIP %st(0),%st(?) */
7180 fp_do_ucomi_ST0_STi( (UInt
)modrm
- 0xE8, True
);
7183 case 0xF0 ... 0xF7: /* FCOMIP %st(0),%st(?) */
7184 /* not really right since COMIP != UCOMIP */
7185 fp_do_ucomi_ST0_STi( (UInt
)modrm
- 0xF0, True
);
7207 /*------------------------------------------------------------*/
7209 /*--- MMX INSTRUCTIONS ---*/
7211 /*------------------------------------------------------------*/
7213 /* Effect of MMX insns on x87 FPU state (table 11-2 of
7214 IA32 arch manual, volume 3):
7216 Read from, or write to MMX register (viz, any insn except EMMS):
7217 * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero
7218 * FP stack pointer set to zero
7221 * All tags set to Invalid (empty) -- FPTAGS[i] := zero
7222 * FP stack pointer set to zero
7225 static void do_MMX_preamble ( void )
7228 IRRegArray
* descr
= mkIRRegArray( OFFB_FPTAGS
, Ity_I8
, 8 );
7229 IRExpr
* zero
= mkU32(0);
7230 IRExpr
* tag1
= mkU8(1);
7232 for (i
= 0; i
< 8; i
++)
7233 stmt( IRStmt_PutI( mkIRPutI(descr
, zero
, i
, tag1
) ) );
7236 static void do_EMMS_preamble ( void )
7239 IRRegArray
* descr
= mkIRRegArray( OFFB_FPTAGS
, Ity_I8
, 8 );
7240 IRExpr
* zero
= mkU32(0);
7241 IRExpr
* tag0
= mkU8(0);
7243 for (i
= 0; i
< 8; i
++)
7244 stmt( IRStmt_PutI( mkIRPutI(descr
, zero
, i
, tag0
) ) );
7248 static IRExpr
* getMMXReg ( UInt archreg
)
7250 vassert(archreg
< 8);
7251 return IRExpr_Get( OFFB_FPREGS
+ 8 * archreg
, Ity_I64
);
7255 static void putMMXReg ( UInt archreg
, IRExpr
* e
)
7257 vassert(archreg
< 8);
7258 vassert(typeOfIRExpr(irsb
->tyenv
,e
) == Ity_I64
);
7259 stmt( IRStmt_Put( OFFB_FPREGS
+ 8 * archreg
, e
) );
7263 /* Helper for non-shift MMX insns. Note this is incomplete in the
7264 sense that it does not first call do_MMX_preamble() -- that is the
7265 responsibility of its caller. */
7268 ULong
dis_MMXop_regmem_to_reg ( const VexAbiInfo
* vbi
,
7273 Bool show_granularity
)
7276 UChar modrm
= getUChar(delta
);
7277 Bool isReg
= epartIsReg(modrm
);
7278 IRExpr
* argL
= NULL
;
7279 IRExpr
* argR
= NULL
;
7280 IRExpr
* argG
= NULL
;
7281 IRExpr
* argE
= NULL
;
7282 IRTemp res
= newTemp(Ity_I64
);
7285 IROp op
= Iop_INVALID
;
7287 const HChar
* hName
= NULL
;
7290 # define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0)
7293 /* Original MMX ones */
7294 case 0xFC: op
= Iop_Add8x8
; break;
7295 case 0xFD: op
= Iop_Add16x4
; break;
7296 case 0xFE: op
= Iop_Add32x2
; break;
7298 case 0xEC: op
= Iop_QAdd8Sx8
; break;
7299 case 0xED: op
= Iop_QAdd16Sx4
; break;
7301 case 0xDC: op
= Iop_QAdd8Ux8
; break;
7302 case 0xDD: op
= Iop_QAdd16Ux4
; break;
7304 case 0xF8: op
= Iop_Sub8x8
; break;
7305 case 0xF9: op
= Iop_Sub16x4
; break;
7306 case 0xFA: op
= Iop_Sub32x2
; break;
7308 case 0xE8: op
= Iop_QSub8Sx8
; break;
7309 case 0xE9: op
= Iop_QSub16Sx4
; break;
7311 case 0xD8: op
= Iop_QSub8Ux8
; break;
7312 case 0xD9: op
= Iop_QSub16Ux4
; break;
7314 case 0xE5: op
= Iop_MulHi16Sx4
; break;
7315 case 0xD5: op
= Iop_Mul16x4
; break;
7316 case 0xF5: XXX(amd64g_calculate_mmx_pmaddwd
); break;
7318 case 0x74: op
= Iop_CmpEQ8x8
; break;
7319 case 0x75: op
= Iop_CmpEQ16x4
; break;
7320 case 0x76: op
= Iop_CmpEQ32x2
; break;
7322 case 0x64: op
= Iop_CmpGT8Sx8
; break;
7323 case 0x65: op
= Iop_CmpGT16Sx4
; break;
7324 case 0x66: op
= Iop_CmpGT32Sx2
; break;
7326 case 0x6B: op
= Iop_QNarrowBin32Sto16Sx4
; eLeft
= True
; break;
7327 case 0x63: op
= Iop_QNarrowBin16Sto8Sx8
; eLeft
= True
; break;
7328 case 0x67: op
= Iop_QNarrowBin16Sto8Ux8
; eLeft
= True
; break;
7330 case 0x68: op
= Iop_InterleaveHI8x8
; eLeft
= True
; break;
7331 case 0x69: op
= Iop_InterleaveHI16x4
; eLeft
= True
; break;
7332 case 0x6A: op
= Iop_InterleaveHI32x2
; eLeft
= True
; break;
7334 case 0x60: op
= Iop_InterleaveLO8x8
; eLeft
= True
; break;
7335 case 0x61: op
= Iop_InterleaveLO16x4
; eLeft
= True
; break;
7336 case 0x62: op
= Iop_InterleaveLO32x2
; eLeft
= True
; break;
7338 case 0xDB: op
= Iop_And64
; break;
7339 case 0xDF: op
= Iop_And64
; invG
= True
; break;
7340 case 0xEB: op
= Iop_Or64
; break;
7341 case 0xEF: /* Possibly do better here if argL and argR are the
7343 op
= Iop_Xor64
; break;
7345 /* Introduced in SSE1 */
7346 case 0xE0: op
= Iop_Avg8Ux8
; break;
7347 case 0xE3: op
= Iop_Avg16Ux4
; break;
7348 case 0xEE: op
= Iop_Max16Sx4
; break;
7349 case 0xDE: op
= Iop_Max8Ux8
; break;
7350 case 0xEA: op
= Iop_Min16Sx4
; break;
7351 case 0xDA: op
= Iop_Min8Ux8
; break;
7352 case 0xE4: op
= Iop_MulHi16Ux4
; break;
7353 case 0xF6: XXX(amd64g_calculate_mmx_psadbw
); break;
7355 /* Introduced in SSE2 */
7356 case 0xD4: op
= Iop_Add64
; break;
7357 case 0xFB: op
= Iop_Sub64
; break;
7360 vex_printf("\n0x%x\n", (UInt
)opc
);
7361 vpanic("dis_MMXop_regmem_to_reg");
7366 argG
= getMMXReg(gregLO3ofRM(modrm
));
7368 argG
= unop(Iop_Not64
, argG
);
7372 argE
= getMMXReg(eregLO3ofRM(modrm
));
7375 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
7377 argE
= loadLE(Ity_I64
, mkexpr(addr
));
7388 if (op
!= Iop_INVALID
) {
7389 vassert(hName
== NULL
);
7390 vassert(hAddr
== NULL
);
7391 assign(res
, binop(op
, argL
, argR
));
7393 vassert(hName
!= NULL
);
7394 vassert(hAddr
!= NULL
);
7398 0/*regparms*/, hName
, hAddr
,
7399 mkIRExprVec_2( argL
, argR
)
7404 putMMXReg( gregLO3ofRM(modrm
), mkexpr(res
) );
7406 DIP("%s%s %s, %s\n",
7407 name
, show_granularity
? nameMMXGran(opc
& 3) : "",
7408 ( isReg
? nameMMXReg(eregLO3ofRM(modrm
)) : dis_buf
),
7409 nameMMXReg(gregLO3ofRM(modrm
)) );
7415 /* Vector by scalar shift of G by the amount specified at the bottom
7416 of E. This is a straight copy of dis_SSE_shiftG_byE. */
7418 static ULong
dis_MMX_shiftG_byE ( const VexAbiInfo
* vbi
,
7419 Prefix pfx
, Long delta
,
7420 const HChar
* opname
, IROp op
)
7426 UChar rm
= getUChar(delta
);
7427 IRTemp g0
= newTemp(Ity_I64
);
7428 IRTemp g1
= newTemp(Ity_I64
);
7429 IRTemp amt
= newTemp(Ity_I64
);
7430 IRTemp amt8
= newTemp(Ity_I8
);
7432 if (epartIsReg(rm
)) {
7433 assign( amt
, getMMXReg(eregLO3ofRM(rm
)) );
7434 DIP("%s %s,%s\n", opname
,
7435 nameMMXReg(eregLO3ofRM(rm
)),
7436 nameMMXReg(gregLO3ofRM(rm
)) );
7439 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
7440 assign( amt
, loadLE(Ity_I64
, mkexpr(addr
)) );
7441 DIP("%s %s,%s\n", opname
,
7443 nameMMXReg(gregLO3ofRM(rm
)) );
7446 assign( g0
, getMMXReg(gregLO3ofRM(rm
)) );
7447 assign( amt8
, unop(Iop_64to8
, mkexpr(amt
)) );
7449 shl
= shr
= sar
= False
;
7452 case Iop_ShlN16x4
: shl
= True
; size
= 32; break;
7453 case Iop_ShlN32x2
: shl
= True
; size
= 32; break;
7454 case Iop_Shl64
: shl
= True
; size
= 64; break;
7455 case Iop_ShrN16x4
: shr
= True
; size
= 16; break;
7456 case Iop_ShrN32x2
: shr
= True
; size
= 32; break;
7457 case Iop_Shr64
: shr
= True
; size
= 64; break;
7458 case Iop_SarN16x4
: sar
= True
; size
= 16; break;
7459 case Iop_SarN32x2
: sar
= True
; size
= 32; break;
7460 default: vassert(0);
7467 binop(Iop_CmpLT64U
,mkexpr(amt
),mkU64(size
)),
7468 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
7477 binop(Iop_CmpLT64U
,mkexpr(amt
),mkU64(size
)),
7478 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
7479 binop(op
, mkexpr(g0
), mkU8(size
-1))
7486 putMMXReg( gregLO3ofRM(rm
), mkexpr(g1
) );
7491 /* Vector by scalar shift of E by an immediate byte. This is a
7492 straight copy of dis_SSE_shiftE_imm. */
7495 ULong
dis_MMX_shiftE_imm ( Long delta
, const HChar
* opname
, IROp op
)
7498 UChar rm
= getUChar(delta
);
7499 IRTemp e0
= newTemp(Ity_I64
);
7500 IRTemp e1
= newTemp(Ity_I64
);
7502 vassert(epartIsReg(rm
));
7503 vassert(gregLO3ofRM(rm
) == 2
7504 || gregLO3ofRM(rm
) == 4 || gregLO3ofRM(rm
) == 6);
7505 amt
= getUChar(delta
+1);
7507 DIP("%s $%d,%s\n", opname
,
7509 nameMMXReg(eregLO3ofRM(rm
)) );
7511 assign( e0
, getMMXReg(eregLO3ofRM(rm
)) );
7513 shl
= shr
= sar
= False
;
7516 case Iop_ShlN16x4
: shl
= True
; size
= 16; break;
7517 case Iop_ShlN32x2
: shl
= True
; size
= 32; break;
7518 case Iop_Shl64
: shl
= True
; size
= 64; break;
7519 case Iop_SarN16x4
: sar
= True
; size
= 16; break;
7520 case Iop_SarN32x2
: sar
= True
; size
= 32; break;
7521 case Iop_ShrN16x4
: shr
= True
; size
= 16; break;
7522 case Iop_ShrN32x2
: shr
= True
; size
= 32; break;
7523 case Iop_Shr64
: shr
= True
; size
= 64; break;
7524 default: vassert(0);
7528 assign( e1
, amt
>= size
7530 : binop(op
, mkexpr(e0
), mkU8(amt
))
7534 assign( e1
, amt
>= size
7535 ? binop(op
, mkexpr(e0
), mkU8(size
-1))
7536 : binop(op
, mkexpr(e0
), mkU8(amt
))
7542 putMMXReg( eregLO3ofRM(rm
), mkexpr(e1
) );
7547 /* Completely handle all MMX instructions except emms. */
7550 ULong
dis_MMX ( Bool
* decode_ok
,
7551 const VexAbiInfo
* vbi
, Prefix pfx
, Int sz
, Long delta
)
7556 UChar opc
= getUChar(delta
);
7559 /* dis_MMX handles all insns except emms. */
7566 /* MOVD (src)ireg32-or-mem32 (E), (dst)mmxreg (G)*/
7567 modrm
= getUChar(delta
);
7568 if (epartIsReg(modrm
)) {
7572 binop( Iop_32HLto64
,
7574 getIReg32(eregOfRexRM(pfx
,modrm
)) ) );
7575 DIP("movd %s, %s\n",
7576 nameIReg32(eregOfRexRM(pfx
,modrm
)),
7577 nameMMXReg(gregLO3ofRM(modrm
)));
7579 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
7583 binop( Iop_32HLto64
,
7585 loadLE(Ity_I32
, mkexpr(addr
)) ) );
7586 DIP("movd %s, %s\n", dis_buf
, nameMMXReg(gregLO3ofRM(modrm
)));
7591 /* MOVD (src)ireg64-or-mem64 (E), (dst)mmxreg (G)*/
7592 modrm
= getUChar(delta
);
7593 if (epartIsReg(modrm
)) {
7595 putMMXReg( gregLO3ofRM(modrm
),
7596 getIReg64(eregOfRexRM(pfx
,modrm
)) );
7597 DIP("movd %s, %s\n",
7598 nameIReg64(eregOfRexRM(pfx
,modrm
)),
7599 nameMMXReg(gregLO3ofRM(modrm
)));
7601 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
7603 putMMXReg( gregLO3ofRM(modrm
),
7604 loadLE(Ity_I64
, mkexpr(addr
)) );
7605 DIP("movd{64} %s, %s\n", dis_buf
, nameMMXReg(gregLO3ofRM(modrm
)));
7609 goto mmx_decode_failure
;
7615 /* MOVD (src)mmxreg (G), (dst)ireg32-or-mem32 (E) */
7616 modrm
= getUChar(delta
);
7617 if (epartIsReg(modrm
)) {
7619 putIReg32( eregOfRexRM(pfx
,modrm
),
7620 unop(Iop_64to32
, getMMXReg(gregLO3ofRM(modrm
)) ) );
7621 DIP("movd %s, %s\n",
7622 nameMMXReg(gregLO3ofRM(modrm
)),
7623 nameIReg32(eregOfRexRM(pfx
,modrm
)));
7625 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
7627 storeLE( mkexpr(addr
),
7628 unop(Iop_64to32
, getMMXReg(gregLO3ofRM(modrm
)) ) );
7629 DIP("movd %s, %s\n", nameMMXReg(gregLO3ofRM(modrm
)), dis_buf
);
7634 /* MOVD (src)mmxreg (G), (dst)ireg64-or-mem64 (E) */
7635 modrm
= getUChar(delta
);
7636 if (epartIsReg(modrm
)) {
7638 putIReg64( eregOfRexRM(pfx
,modrm
),
7639 getMMXReg(gregLO3ofRM(modrm
)) );
7640 DIP("movd %s, %s\n",
7641 nameMMXReg(gregLO3ofRM(modrm
)),
7642 nameIReg64(eregOfRexRM(pfx
,modrm
)));
7644 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
7646 storeLE( mkexpr(addr
),
7647 getMMXReg(gregLO3ofRM(modrm
)) );
7648 DIP("movd{64} %s, %s\n", nameMMXReg(gregLO3ofRM(modrm
)), dis_buf
);
7651 goto mmx_decode_failure
;
7656 /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
7658 && /*ignore redundant REX.W*/!(sz
==8 && haveNo66noF2noF3(pfx
)))
7659 goto mmx_decode_failure
;
7660 modrm
= getUChar(delta
);
7661 if (epartIsReg(modrm
)) {
7663 putMMXReg( gregLO3ofRM(modrm
), getMMXReg(eregLO3ofRM(modrm
)) );
7664 DIP("movq %s, %s\n",
7665 nameMMXReg(eregLO3ofRM(modrm
)),
7666 nameMMXReg(gregLO3ofRM(modrm
)));
7668 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
7670 putMMXReg( gregLO3ofRM(modrm
), loadLE(Ity_I64
, mkexpr(addr
)) );
7671 DIP("movq %s, %s\n",
7672 dis_buf
, nameMMXReg(gregLO3ofRM(modrm
)));
7677 /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
7679 && /*ignore redundant REX.W*/!(sz
==8 && haveNo66noF2noF3(pfx
)))
7680 goto mmx_decode_failure
;
7681 modrm
= getUChar(delta
);
7682 if (epartIsReg(modrm
)) {
7684 putMMXReg( eregLO3ofRM(modrm
), getMMXReg(gregLO3ofRM(modrm
)) );
7685 DIP("movq %s, %s\n",
7686 nameMMXReg(gregLO3ofRM(modrm
)),
7687 nameMMXReg(eregLO3ofRM(modrm
)));
7689 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
7691 storeLE( mkexpr(addr
), getMMXReg(gregLO3ofRM(modrm
)) );
7692 DIP("mov(nt)q %s, %s\n",
7693 nameMMXReg(gregLO3ofRM(modrm
)), dis_buf
);
7699 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
7701 goto mmx_decode_failure
;
7702 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "padd", True
);
7706 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
7708 && /*ignore redundant REX.W*/!(sz
==8 && haveNo66noF2noF3(pfx
)))
7709 goto mmx_decode_failure
;
7710 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "padds", True
);
7714 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
7716 goto mmx_decode_failure
;
7717 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "paddus", True
);
7722 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
7724 goto mmx_decode_failure
;
7725 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "psub", True
);
7729 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
7731 goto mmx_decode_failure
;
7732 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "psubs", True
);
7736 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
7738 goto mmx_decode_failure
;
7739 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "psubus", True
);
7742 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
7744 goto mmx_decode_failure
;
7745 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "pmulhw", False
);
7748 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
7750 goto mmx_decode_failure
;
7751 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "pmullw", False
);
7754 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
7756 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "pmaddwd", False
);
7761 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
7763 goto mmx_decode_failure
;
7764 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "pcmpeq", True
);
7769 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
7771 goto mmx_decode_failure
;
7772 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "pcmpgt", True
);
7775 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
7777 goto mmx_decode_failure
;
7778 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "packssdw", False
);
7781 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
7783 goto mmx_decode_failure
;
7784 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "packsswb", False
);
7787 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
7789 goto mmx_decode_failure
;
7790 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "packuswb", False
);
7795 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
7797 && /*ignore redundant REX.W*/!(sz
==8 && haveNo66noF2noF3(pfx
)))
7798 goto mmx_decode_failure
;
7799 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "punpckh", True
);
7804 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
7806 && /*ignore redundant REX.W*/!(sz
==8 && haveNo66noF2noF3(pfx
)))
7807 goto mmx_decode_failure
;
7808 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "punpckl", True
);
7811 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
7813 goto mmx_decode_failure
;
7814 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "pand", False
);
7817 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
7819 goto mmx_decode_failure
;
7820 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "pandn", False
);
7823 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
7825 goto mmx_decode_failure
;
7826 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "por", False
);
7829 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
7831 goto mmx_decode_failure
;
7832 delta
= dis_MMXop_regmem_to_reg ( vbi
, pfx
, delta
, opc
, "pxor", False
);
7835 # define SHIFT_BY_REG(_name,_op) \
7836 delta = dis_MMX_shiftG_byE(vbi, pfx, delta, _name, _op); \
7839 /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
7840 case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4
);
7841 case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2
);
7842 case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64
);
7844 /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
7845 case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4
);
7846 case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2
);
7847 case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64
);
7849 /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
7850 case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4
);
7851 case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2
);
7853 # undef SHIFT_BY_REG
7858 /* (sz==4): PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
7859 UChar byte2
, subopc
;
7861 goto mmx_decode_failure
;
7862 byte2
= getUChar(delta
); /* amode / sub-opcode */
7863 subopc
= toUChar( (byte2
>> 3) & 7 );
7865 # define SHIFT_BY_IMM(_name,_op) \
7866 do { delta = dis_MMX_shiftE_imm(delta,_name,_op); \
7869 if (subopc
== 2 /*SRL*/ && opc
== 0x71)
7870 SHIFT_BY_IMM("psrlw", Iop_ShrN16x4
);
7871 else if (subopc
== 2 /*SRL*/ && opc
== 0x72)
7872 SHIFT_BY_IMM("psrld", Iop_ShrN32x2
);
7873 else if (subopc
== 2 /*SRL*/ && opc
== 0x73)
7874 SHIFT_BY_IMM("psrlq", Iop_Shr64
);
7876 else if (subopc
== 4 /*SAR*/ && opc
== 0x71)
7877 SHIFT_BY_IMM("psraw", Iop_SarN16x4
);
7878 else if (subopc
== 4 /*SAR*/ && opc
== 0x72)
7879 SHIFT_BY_IMM("psrad", Iop_SarN32x2
);
7881 else if (subopc
== 6 /*SHL*/ && opc
== 0x71)
7882 SHIFT_BY_IMM("psllw", Iop_ShlN16x4
);
7883 else if (subopc
== 6 /*SHL*/ && opc
== 0x72)
7884 SHIFT_BY_IMM("pslld", Iop_ShlN32x2
);
7885 else if (subopc
== 6 /*SHL*/ && opc
== 0x73)
7886 SHIFT_BY_IMM("psllq", Iop_Shl64
);
7888 else goto mmx_decode_failure
;
7890 # undef SHIFT_BY_IMM
7895 IRTemp addr
= newTemp(Ity_I64
);
7896 IRTemp regD
= newTemp(Ity_I64
);
7897 IRTemp regM
= newTemp(Ity_I64
);
7898 IRTemp mask
= newTemp(Ity_I64
);
7899 IRTemp olddata
= newTemp(Ity_I64
);
7900 IRTemp newdata
= newTemp(Ity_I64
);
7902 modrm
= getUChar(delta
);
7903 if (sz
!= 4 || (!epartIsReg(modrm
)))
7904 goto mmx_decode_failure
;
7907 assign( addr
, handleAddrOverrides( vbi
, pfx
, getIReg64(R_RDI
) ));
7908 assign( regM
, getMMXReg( eregLO3ofRM(modrm
) ));
7909 assign( regD
, getMMXReg( gregLO3ofRM(modrm
) ));
7910 assign( mask
, binop(Iop_SarN8x8
, mkexpr(regM
), mkU8(7)) );
7911 assign( olddata
, loadLE( Ity_I64
, mkexpr(addr
) ));
7919 unop(Iop_Not64
, mkexpr(mask
)))) );
7920 storeLE( mkexpr(addr
), mkexpr(newdata
) );
7921 DIP("maskmovq %s,%s\n", nameMMXReg( eregLO3ofRM(modrm
) ),
7922 nameMMXReg( gregLO3ofRM(modrm
) ) );
7926 /* --- MMX decode failure --- */
7930 return delta
; /* ignored */
7939 /*------------------------------------------------------------*/
7940 /*--- More misc arithmetic and other obscure insns. ---*/
7941 /*------------------------------------------------------------*/
7943 /* Generate base << amt with vacated places filled with stuff
7944 from xtra. amt guaranteed in 0 .. 63. */
7946 IRExpr
* shiftL64_with_extras ( IRTemp base
, IRTemp xtra
, IRTemp amt
)
7950 else (base << amt) | (xtra >>u (64-amt))
7954 binop(Iop_CmpNE8
, mkexpr(amt
), mkU8(0)),
7956 binop(Iop_Shl64
, mkexpr(base
), mkexpr(amt
)),
7957 binop(Iop_Shr64
, mkexpr(xtra
),
7958 binop(Iop_Sub8
, mkU8(64), mkexpr(amt
)))
7964 /* Generate base >>u amt with vacated places filled with stuff
7965 from xtra. amt guaranteed in 0 .. 63. */
7967 IRExpr
* shiftR64_with_extras ( IRTemp xtra
, IRTemp base
, IRTemp amt
)
7971 else (base >>u amt) | (xtra << (64-amt))
7975 binop(Iop_CmpNE8
, mkexpr(amt
), mkU8(0)),
7977 binop(Iop_Shr64
, mkexpr(base
), mkexpr(amt
)),
7978 binop(Iop_Shl64
, mkexpr(xtra
),
7979 binop(Iop_Sub8
, mkU8(64), mkexpr(amt
)))
7985 /* Double length left and right shifts. Apparently only required in
7986 v-size (no b- variant). */
7988 ULong
dis_SHLRD_Gv_Ev ( const VexAbiInfo
* vbi
,
7990 Long delta
, UChar modrm
,
7993 Bool amt_is_literal
,
7994 const HChar
* shift_amt_txt
,
7997 /* shift_amt :: Ity_I8 is the amount to shift. shift_amt_txt is used
7998 for printing it. And eip on entry points at the modrm byte. */
8002 IRType ty
= szToITy(sz
);
8003 IRTemp gsrc
= newTemp(ty
);
8004 IRTemp esrc
= newTemp(ty
);
8005 IRTemp addr
= IRTemp_INVALID
;
8006 IRTemp tmpSH
= newTemp(Ity_I8
);
8007 IRTemp tmpSS
= newTemp(Ity_I8
);
8008 IRTemp tmp64
= IRTemp_INVALID
;
8009 IRTemp res64
= IRTemp_INVALID
;
8010 IRTemp rss64
= IRTemp_INVALID
;
8011 IRTemp resTy
= IRTemp_INVALID
;
8012 IRTemp rssTy
= IRTemp_INVALID
;
8013 Int mask
= sz
==8 ? 63 : 31;
8015 vassert(sz
== 2 || sz
== 4 || sz
== 8);
8017 /* The E-part is the destination; this is shifted. The G-part
8018 supplies bits to be shifted into the E-part, but is not
8021 If shifting left, form a double-length word with E at the top
8022 and G at the bottom, and shift this left. The result is then in
8025 If shifting right, form a double-length word with G at the top
8026 and E at the bottom, and shift this right. The result is then
8029 /* Fetch the operands. */
8031 assign( gsrc
, getIRegG(sz
, pfx
, modrm
) );
8033 if (epartIsReg(modrm
)) {
8035 assign( esrc
, getIRegE(sz
, pfx
, modrm
) );
8036 DIP("sh%cd%c %s, %s, %s\n",
8037 ( left_shift
? 'l' : 'r' ), nameISize(sz
),
8039 nameIRegG(sz
, pfx
, modrm
), nameIRegE(sz
, pfx
, modrm
));
8041 addr
= disAMode ( &len
, vbi
, pfx
, delta
, dis_buf
,
8042 /* # bytes following amode */
8043 amt_is_literal
? 1 : 0 );
8045 assign( esrc
, loadLE(ty
, mkexpr(addr
)) );
8046 DIP("sh%cd%c %s, %s, %s\n",
8047 ( left_shift
? 'l' : 'r' ), nameISize(sz
),
8049 nameIRegG(sz
, pfx
, modrm
), dis_buf
);
8052 /* Calculate the masked shift amount (tmpSH), the masked subshift
8053 amount (tmpSS), the shifted value (res64) and the subshifted
8056 assign( tmpSH
, binop(Iop_And8
, shift_amt
, mkU8(mask
)) );
8057 assign( tmpSS
, binop(Iop_And8
,
8058 binop(Iop_Sub8
, mkexpr(tmpSH
), mkU8(1) ),
8061 tmp64
= newTemp(Ity_I64
);
8062 res64
= newTemp(Ity_I64
);
8063 rss64
= newTemp(Ity_I64
);
8065 if (sz
== 2 || sz
== 4) {
8067 /* G is xtra; E is data */
8068 /* what a freaking nightmare: */
8069 if (sz
== 4 && left_shift
) {
8070 assign( tmp64
, binop(Iop_32HLto64
, mkexpr(esrc
), mkexpr(gsrc
)) );
8073 binop(Iop_Shl64
, mkexpr(tmp64
), mkexpr(tmpSH
)),
8077 binop(Iop_Shl64
, mkexpr(tmp64
), mkexpr(tmpSS
)),
8081 if (sz
== 4 && !left_shift
) {
8082 assign( tmp64
, binop(Iop_32HLto64
, mkexpr(gsrc
), mkexpr(esrc
)) );
8083 assign( res64
, binop(Iop_Shr64
, mkexpr(tmp64
), mkexpr(tmpSH
)) );
8084 assign( rss64
, binop(Iop_Shr64
, mkexpr(tmp64
), mkexpr(tmpSS
)) );
8087 if (sz
== 2 && left_shift
) {
8090 binop(Iop_16HLto32
, mkexpr(esrc
), mkexpr(gsrc
)),
8091 binop(Iop_16HLto32
, mkexpr(gsrc
), mkexpr(gsrc
))
8093 /* result formed by shifting [esrc'gsrc'gsrc'gsrc] */
8096 binop(Iop_Shl64
, mkexpr(tmp64
), mkexpr(tmpSH
)),
8098 /* subshift formed by shifting [esrc'0000'0000'0000] */
8102 binop(Iop_Shl64
, unop(Iop_16Uto64
, mkexpr(esrc
)),
8108 if (sz
== 2 && !left_shift
) {
8111 binop(Iop_16HLto32
, mkexpr(gsrc
), mkexpr(gsrc
)),
8112 binop(Iop_16HLto32
, mkexpr(gsrc
), mkexpr(esrc
))
8114 /* result formed by shifting [gsrc'gsrc'gsrc'esrc] */
8115 assign( res64
, binop(Iop_Shr64
, mkexpr(tmp64
), mkexpr(tmpSH
)) );
8116 /* subshift formed by shifting [0000'0000'0000'esrc] */
8117 assign( rss64
, binop(Iop_Shr64
,
8118 unop(Iop_16Uto64
, mkexpr(esrc
)),
8126 assign( res64
, shiftL64_with_extras( esrc
, gsrc
, tmpSH
));
8127 assign( rss64
, shiftL64_with_extras( esrc
, gsrc
, tmpSS
));
8129 assign( res64
, shiftR64_with_extras( gsrc
, esrc
, tmpSH
));
8130 assign( rss64
, shiftR64_with_extras( gsrc
, esrc
, tmpSS
));
8135 resTy
= newTemp(ty
);
8136 rssTy
= newTemp(ty
);
8137 assign( resTy
, narrowTo(ty
, mkexpr(res64
)) );
8138 assign( rssTy
, narrowTo(ty
, mkexpr(rss64
)) );
8140 /* Put result back and write the flags thunk. */
8141 setFlags_DEP1_DEP2_shift ( left_shift
? Iop_Shl64
: Iop_Sar64
,
8142 resTy
, rssTy
, ty
, tmpSH
);
8144 if (epartIsReg(modrm
)) {
8145 putIRegE(sz
, pfx
, modrm
, mkexpr(resTy
));
8147 storeLE( mkexpr(addr
), mkexpr(resTy
) );
8150 if (amt_is_literal
) delta
++;
8155 /* Handle BT/BTS/BTR/BTC Gv, Ev. Apparently b-size is not
8158 typedef enum { BtOpNone
, BtOpSet
, BtOpReset
, BtOpComp
} BtOp
;
8160 static const HChar
* nameBtOp ( BtOp op
)
8163 case BtOpNone
: return "";
8164 case BtOpSet
: return "s";
8165 case BtOpReset
: return "r";
8166 case BtOpComp
: return "c";
8167 default: vpanic("nameBtOp(amd64)");
8173 ULong
dis_bt_G_E ( const VexAbiInfo
* vbi
,
8174 Prefix pfx
, Int sz
, Long delta
, BtOp op
,
8175 /*OUT*/Bool
* decode_OK
)
8180 IRTemp t_fetched
, t_bitno0
, t_bitno1
, t_bitno2
, t_addr0
,
8181 t_addr1
, t_rsp
, t_mask
, t_new
;
8183 vassert(sz
== 2 || sz
== 4 || sz
== 8);
8185 t_fetched
= t_bitno0
= t_bitno1
= t_bitno2
8186 = t_addr0
= t_addr1
= t_rsp
8187 = t_mask
= t_new
= IRTemp_INVALID
;
8189 t_fetched
= newTemp(Ity_I8
);
8190 t_new
= newTemp(Ity_I8
);
8191 t_bitno0
= newTemp(Ity_I64
);
8192 t_bitno1
= newTemp(Ity_I64
);
8193 t_bitno2
= newTemp(Ity_I8
);
8194 t_addr1
= newTemp(Ity_I64
);
8195 modrm
= getUChar(delta
);
8198 if (epartIsReg(modrm
)) {
8199 /* F2 and F3 are never acceptable. */
8200 if (haveF2orF3(pfx
)) {
8205 /* F2 or F3 (but not both) are allowed, provided LOCK is also
8206 present, and only for the BTC/BTS/BTR cases (not BT). */
8207 if (haveF2orF3(pfx
)) {
8208 if (haveF2andF3(pfx
) || !haveLOCK(pfx
) || op
== BtOpNone
) {
8215 assign( t_bitno0
, widenSto64(getIRegG(sz
, pfx
, modrm
)) );
8217 if (epartIsReg(modrm
)) {
8219 /* Get it onto the client's stack. Oh, this is a horrible
8220 kludge. See https://bugs.kde.org/show_bug.cgi?id=245925.
8221 Because of the ELF ABI stack redzone, there may be live data
8222 up to 128 bytes below %RSP. So we can't just push it on the
8223 stack, else we may wind up trashing live data, and causing
8224 impossible-to-find simulation errors. (Yes, this did
8225 happen.) So we need to drop RSP before at least 128 before
8226 pushing it. That unfortunately means hitting Memcheck's
8227 fast-case painting code. Ideally we should drop more than
8228 128, to reduce the chances of breaking buggy programs that
8229 have live data below -128(%RSP). Memcheck fast-cases moves
8230 of 288 bytes due to the need to handle ppc64-linux quickly,
8231 so let's use 288. Of course the real fix is to get rid of
8232 this kludge entirely. */
8233 t_rsp
= newTemp(Ity_I64
);
8234 t_addr0
= newTemp(Ity_I64
);
8236 vassert(vbi
->guest_stack_redzone_size
== 128);
8237 assign( t_rsp
, binop(Iop_Sub64
, getIReg64(R_RSP
), mkU64(288)) );
8238 putIReg64(R_RSP
, mkexpr(t_rsp
));
8240 storeLE( mkexpr(t_rsp
), getIRegE(sz
, pfx
, modrm
) );
8242 /* Make t_addr0 point at it. */
8243 assign( t_addr0
, mkexpr(t_rsp
) );
8245 /* Mask out upper bits of the shift amount, since we're doing a
8247 assign( t_bitno1
, binop(Iop_And64
,
8249 mkU64(sz
== 8 ? 63 : sz
== 4 ? 31 : 15)) );
8252 t_addr0
= disAMode ( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
8254 assign( t_bitno1
, mkexpr(t_bitno0
) );
8257 /* At this point: t_addr0 is the address being operated on. If it
8258 was a reg, we will have pushed it onto the client's stack.
8259 t_bitno1 is the bit number, suitably masked in the case of a
8262 /* Now the main sequence. */
8266 binop(Iop_Sar64
, mkexpr(t_bitno1
), mkU8(3))) );
8268 /* t_addr1 now holds effective address */
8272 binop(Iop_And64
, mkexpr(t_bitno1
), mkU64(7))) );
8274 /* t_bitno2 contains offset of bit within byte */
8276 if (op
!= BtOpNone
) {
8277 t_mask
= newTemp(Ity_I8
);
8278 assign( t_mask
, binop(Iop_Shl8
, mkU8(1), mkexpr(t_bitno2
)) );
8281 /* t_mask is now a suitable byte mask */
8283 assign( t_fetched
, loadLE(Ity_I8
, mkexpr(t_addr1
)) );
8285 if (op
!= BtOpNone
) {
8289 binop(Iop_Or8
, mkexpr(t_fetched
), mkexpr(t_mask
)) );
8293 binop(Iop_Xor8
, mkexpr(t_fetched
), mkexpr(t_mask
)) );
8297 binop(Iop_And8
, mkexpr(t_fetched
),
8298 unop(Iop_Not8
, mkexpr(t_mask
))) );
8301 vpanic("dis_bt_G_E(amd64)");
8303 if ((haveLOCK(pfx
)) && !epartIsReg(modrm
)) {
8304 casLE( mkexpr(t_addr1
), mkexpr(t_fetched
)/*expd*/,
8305 mkexpr(t_new
)/*new*/,
8306 guest_RIP_curr_instr
);
8308 storeLE( mkexpr(t_addr1
), mkexpr(t_new
) );
8312 /* Side effect done; now get selected bit into Carry flag. The Intel docs
8313 (as of 2015, at least) say that C holds the result, Z is unchanged, and
8314 O,S,A and P are undefined. However, on Skylake it appears that O,S,A,P
8315 are also unchanged, so let's do that. */
8316 const ULong maskC
= AMD64G_CC_MASK_C
;
8317 const ULong maskOSZAP
= AMD64G_CC_MASK_O
| AMD64G_CC_MASK_S
8318 | AMD64G_CC_MASK_Z
| AMD64G_CC_MASK_A
8321 IRTemp old_rflags
= newTemp(Ity_I64
);
8322 assign(old_rflags
, mk_amd64g_calculate_rflags_all());
8324 IRTemp new_rflags
= newTemp(Ity_I64
);
8327 binop(Iop_And64
, mkexpr(old_rflags
), mkU64(maskOSZAP
)),
8330 unop(Iop_8Uto64
, mkexpr(t_fetched
)),
8334 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
8335 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
8336 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(new_rflags
) ));
8337 /* Set NDEP even though it isn't used. This makes redundant-PUT
8338 elimination of previous stores to this field work better. */
8339 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
8341 /* Move reg operand from stack back to reg */
8342 if (epartIsReg(modrm
)) {
8343 /* t_rsp still points at it. */
8344 /* only write the reg if actually modifying it; doing otherwise
8345 zeroes the top half erroneously when doing btl due to
8346 standard zero-extend rule */
8348 putIRegE(sz
, pfx
, modrm
, loadLE(szToITy(sz
), mkexpr(t_rsp
)) );
8349 putIReg64(R_RSP
, binop(Iop_Add64
, mkexpr(t_rsp
), mkU64(288)) );
8352 DIP("bt%s%c %s, %s\n",
8353 nameBtOp(op
), nameISize(sz
), nameIRegG(sz
, pfx
, modrm
),
8354 ( epartIsReg(modrm
) ? nameIRegE(sz
, pfx
, modrm
) : dis_buf
) );
8361 /* Handle BSF/BSR. Only v-size seems necessary. */
8363 ULong
dis_bs_E_G ( const VexAbiInfo
* vbi
,
8364 Prefix pfx
, Int sz
, Long delta
, Bool fwds
)
8370 IRType ty
= szToITy(sz
);
8371 IRTemp src
= newTemp(ty
);
8372 IRTemp dst
= newTemp(ty
);
8373 IRTemp src64
= newTemp(Ity_I64
);
8374 IRTemp dst64
= newTemp(Ity_I64
);
8375 IRTemp srcB
= newTemp(Ity_I1
);
8377 vassert(sz
== 8 || sz
== 4 || sz
== 2);
8379 modrm
= getUChar(delta
);
8380 isReg
= epartIsReg(modrm
);
8383 assign( src
, getIRegE(sz
, pfx
, modrm
) );
8386 IRTemp addr
= disAMode( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
8388 assign( src
, loadLE(ty
, mkexpr(addr
)) );
8391 DIP("bs%c%c %s, %s\n",
8392 fwds
? 'f' : 'r', nameISize(sz
),
8393 ( isReg
? nameIRegE(sz
, pfx
, modrm
) : dis_buf
),
8394 nameIRegG(sz
, pfx
, modrm
));
8396 /* First, widen src to 64 bits if it is not already. */
8397 assign( src64
, widenUto64(mkexpr(src
)) );
8399 /* Generate a bool expression which is zero iff the original is
8400 zero, and nonzero otherwise. Ask for a CmpNE version which, if
8401 instrumented by Memcheck, is instrumented expensively, since
8402 this may be used on the output of a preceding movmskb insn,
8403 which has been known to be partially defined, and in need of
8404 careful handling. */
8405 assign( srcB
, binop(Iop_ExpCmpNE64
, mkexpr(src64
), mkU64(0)) );
8407 /* Flags: Z is 1 iff source value is zero. All others
8408 are undefined -- we force them to zero. */
8409 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
8410 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
8413 IRExpr_ITE( mkexpr(srcB
),
8417 mkU64(AMD64G_CC_MASK_Z
)
8420 /* Set NDEP even though it isn't used. This makes redundant-PUT
8421 elimination of previous stores to this field work better. */
8422 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
8424 /* Result: iff source value is zero, we can't use
8425 Iop_Clz64/Iop_Ctz64 as they have no defined result in that case.
8426 But anyway, amd64 semantics say the result is undefined in
8427 such situations. Hence handle the zero case specially. */
8429 /* Bleh. What we compute:
8431 bsf64: if src == 0 then {dst is unchanged}
8434 bsr64: if src == 0 then {dst is unchanged}
8435 else 63 - Clz64(src)
8437 bsf32: if src == 0 then {dst is unchanged}
8438 else Ctz64(32Uto64(src))
8440 bsr32: if src == 0 then {dst is unchanged}
8441 else 63 - Clz64(32Uto64(src))
8443 bsf16: if src == 0 then {dst is unchanged}
8444 else Ctz64(32Uto64(16Uto32(src)))
8446 bsr16: if src == 0 then {dst is unchanged}
8447 else 63 - Clz64(32Uto64(16Uto32(src)))
8450 /* The main computation, guarding against zero. */
8455 fwds
? unop(Iop_Ctz64
, mkexpr(src64
))
8458 unop(Iop_Clz64
, mkexpr(src64
))),
8459 /* src == 0 -- leave dst unchanged */
8460 widenUto64( getIRegG( sz
, pfx
, modrm
) )
8465 assign( dst
, unop(Iop_64to16
, mkexpr(dst64
)) );
8468 assign( dst
, unop(Iop_64to32
, mkexpr(dst64
)) );
8470 assign( dst
, mkexpr(dst64
) );
8472 /* dump result back */
8473 putIRegG( sz
, pfx
, modrm
, mkexpr(dst
) );
8479 /* swap rAX with the reg specified by reg and REX.B */
8481 void codegen_xchg_rAX_Reg ( Prefix pfx
, Int sz
, UInt regLo3
)
8483 IRType ty
= szToITy(sz
);
8484 IRTemp t1
= newTemp(ty
);
8485 IRTemp t2
= newTemp(ty
);
8486 vassert(sz
== 2 || sz
== 4 || sz
== 8);
8487 vassert(regLo3
< 8);
8489 assign( t1
, getIReg64(R_RAX
) );
8490 assign( t2
, getIRegRexB(8, pfx
, regLo3
) );
8491 putIReg64( R_RAX
, mkexpr(t2
) );
8492 putIRegRexB(8, pfx
, regLo3
, mkexpr(t1
) );
8493 } else if (sz
== 4) {
8494 assign( t1
, getIReg32(R_RAX
) );
8495 assign( t2
, getIRegRexB(4, pfx
, regLo3
) );
8496 putIReg32( R_RAX
, mkexpr(t2
) );
8497 putIRegRexB(4, pfx
, regLo3
, mkexpr(t1
) );
8499 assign( t1
, getIReg16(R_RAX
) );
8500 assign( t2
, getIRegRexB(2, pfx
, regLo3
) );
8501 putIReg16( R_RAX
, mkexpr(t2
) );
8502 putIRegRexB(2, pfx
, regLo3
, mkexpr(t1
) );
8504 DIP("xchg%c %s, %s\n",
8505 nameISize(sz
), nameIRegRAX(sz
),
8506 nameIRegRexB(sz
,pfx
, regLo3
));
8511 void codegen_SAHF ( void )
8513 /* Set the flags to:
8514 (amd64g_calculate_flags_all() & AMD64G_CC_MASK_O)
8515 -- retain the old O flag
8516 | (%AH & (AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A
8517 |AMD64G_CC_MASK_P|AMD64G_CC_MASK_C)
8519 ULong mask_SZACP
= AMD64G_CC_MASK_S
|AMD64G_CC_MASK_Z
|AMD64G_CC_MASK_A
8520 |AMD64G_CC_MASK_C
|AMD64G_CC_MASK_P
;
8521 IRTemp oldflags
= newTemp(Ity_I64
);
8522 assign( oldflags
, mk_amd64g_calculate_rflags_all() );
8523 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
8524 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
8525 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
8526 stmt( IRStmt_Put( OFFB_CC_DEP1
,
8528 binop(Iop_And64
, mkexpr(oldflags
), mkU64(AMD64G_CC_MASK_O
)),
8530 binop(Iop_Shr64
, getIReg64(R_RAX
), mkU8(8)),
8538 void codegen_LAHF ( void )
8540 /* AH <- EFLAGS(SF:ZF:0:AF:0:PF:1:CF) */
8541 IRExpr
* rax_with_hole
;
8544 ULong mask_SZACP
= AMD64G_CC_MASK_S
|AMD64G_CC_MASK_Z
|AMD64G_CC_MASK_A
8545 |AMD64G_CC_MASK_C
|AMD64G_CC_MASK_P
;
8547 IRTemp flags
= newTemp(Ity_I64
);
8548 assign( flags
, mk_amd64g_calculate_rflags_all() );
8551 = binop(Iop_And64
, getIReg64(R_RAX
), mkU64(~0xFF00ULL
));
8553 = binop(Iop_Or64
, binop(Iop_And64
, mkexpr(flags
), mkU64(mask_SZACP
)),
8556 = binop(Iop_Or64
, rax_with_hole
,
8557 binop(Iop_Shl64
, new_byte
, mkU8(8)));
8558 putIReg64(R_RAX
, new_rax
);
8563 ULong
dis_cmpxchg_G_E ( /*OUT*/Bool
* ok
,
8564 const VexAbiInfo
* vbi
,
8572 IRType ty
= szToITy(size
);
8573 IRTemp acc
= newTemp(ty
);
8574 IRTemp src
= newTemp(ty
);
8575 IRTemp dest
= newTemp(ty
);
8576 IRTemp dest2
= newTemp(ty
);
8577 IRTemp acc2
= newTemp(ty
);
8578 IRTemp cond
= newTemp(Ity_I1
);
8579 IRTemp addr
= IRTemp_INVALID
;
8580 UChar rm
= getUChar(delta0
);
8582 /* There are 3 cases to consider:
8584 reg-reg: ignore any lock prefix, generate sequence based
8587 reg-mem, not locked: ignore any lock prefix, generate sequence
8590 reg-mem, locked: use IRCAS
8593 /* Decide whether F2 or F3 are acceptable. Never for register
8594 case, but for the memory case, one or the other is OK provided
8595 LOCK is also present. */
8596 if (epartIsReg(rm
)) {
8597 if (haveF2orF3(pfx
)) {
8602 if (haveF2orF3(pfx
)) {
8603 if (haveF2andF3(pfx
) || !haveLOCK(pfx
)) {
8610 if (epartIsReg(rm
)) {
8612 assign( dest
, getIRegE(size
, pfx
, rm
) );
8614 assign( src
, getIRegG(size
, pfx
, rm
) );
8615 assign( acc
, getIRegRAX(size
) );
8616 setFlags_DEP1_DEP2(Iop_Sub8
, acc
, dest
, ty
);
8617 assign( cond
, mk_amd64g_calculate_condition(AMD64CondZ
) );
8618 assign( dest2
, IRExpr_ITE(mkexpr(cond
), mkexpr(src
), mkexpr(dest
)) );
8619 assign( acc2
, IRExpr_ITE(mkexpr(cond
), mkexpr(acc
), mkexpr(dest
)) );
8620 putIRegRAX(size
, mkexpr(acc2
));
8621 putIRegE(size
, pfx
, rm
, mkexpr(dest2
));
8622 DIP("cmpxchg%c %s,%s\n", nameISize(size
),
8623 nameIRegG(size
,pfx
,rm
),
8624 nameIRegE(size
,pfx
,rm
) );
8626 else if (!epartIsReg(rm
) && !haveLOCK(pfx
)) {
8628 addr
= disAMode ( &len
, vbi
, pfx
, delta0
, dis_buf
, 0 );
8629 assign( dest
, loadLE(ty
, mkexpr(addr
)) );
8631 assign( src
, getIRegG(size
, pfx
, rm
) );
8632 assign( acc
, getIRegRAX(size
) );
8633 setFlags_DEP1_DEP2(Iop_Sub8
, acc
, dest
, ty
);
8634 assign( cond
, mk_amd64g_calculate_condition(AMD64CondZ
) );
8635 assign( dest2
, IRExpr_ITE(mkexpr(cond
), mkexpr(src
), mkexpr(dest
)) );
8636 assign( acc2
, IRExpr_ITE(mkexpr(cond
), mkexpr(acc
), mkexpr(dest
)) );
8637 putIRegRAX(size
, mkexpr(acc2
));
8638 storeLE( mkexpr(addr
), mkexpr(dest2
) );
8639 DIP("cmpxchg%c %s,%s\n", nameISize(size
),
8640 nameIRegG(size
,pfx
,rm
), dis_buf
);
8642 else if (!epartIsReg(rm
) && haveLOCK(pfx
)) {
8644 /* src is new value. acc is expected value. dest is old value.
8645 Compute success from the output of the IRCAS, and steer the
8646 new value for RAX accordingly: in case of success, RAX is
8648 addr
= disAMode ( &len
, vbi
, pfx
, delta0
, dis_buf
, 0 );
8650 assign( src
, getIRegG(size
, pfx
, rm
) );
8651 assign( acc
, getIRegRAX(size
) );
8653 mkIRCAS( IRTemp_INVALID
, dest
, Iend_LE
, mkexpr(addr
),
8654 NULL
, mkexpr(acc
), NULL
, mkexpr(src
) )
8656 setFlags_DEP1_DEP2(Iop_Sub8
, acc
, dest
, ty
);
8657 assign( cond
, mk_amd64g_calculate_condition(AMD64CondZ
) );
8658 assign( acc2
, IRExpr_ITE(mkexpr(cond
), mkexpr(acc
), mkexpr(dest
)) );
8659 putIRegRAX(size
, mkexpr(acc2
));
8660 DIP("cmpxchg%c %s,%s\n", nameISize(size
),
8661 nameIRegG(size
,pfx
,rm
), dis_buf
);
8670 /* Handle conditional move instructions of the form
8671 cmovcc E(reg-or-mem), G(reg)
8673 E(src) is reg-or-mem
8676 If E is reg, --> GET %E, tmps
8681 If E is mem --> (getAddr E) -> tmpa
8688 ULong
dis_cmov_E_G ( const VexAbiInfo
* vbi
,
8694 UChar rm
= getUChar(delta0
);
8698 IRType ty
= szToITy(sz
);
8699 IRTemp tmps
= newTemp(ty
);
8700 IRTemp tmpd
= newTemp(ty
);
8702 if (epartIsReg(rm
)) {
8703 assign( tmps
, getIRegE(sz
, pfx
, rm
) );
8704 assign( tmpd
, getIRegG(sz
, pfx
, rm
) );
8706 putIRegG( sz
, pfx
, rm
,
8707 IRExpr_ITE( mk_amd64g_calculate_condition(cond
),
8711 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond
),
8712 nameIRegE(sz
,pfx
,rm
),
8713 nameIRegG(sz
,pfx
,rm
));
8717 /* E refers to memory */
8719 IRTemp addr
= disAMode ( &len
, vbi
, pfx
, delta0
, dis_buf
, 0 );
8720 assign( tmps
, loadLE(ty
, mkexpr(addr
)) );
8721 assign( tmpd
, getIRegG(sz
, pfx
, rm
) );
8723 putIRegG( sz
, pfx
, rm
,
8724 IRExpr_ITE( mk_amd64g_calculate_condition(cond
),
8729 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond
),
8731 nameIRegG(sz
,pfx
,rm
));
8738 ULong
dis_xadd_G_E ( /*OUT*/Bool
* decode_ok
,
8739 const VexAbiInfo
* vbi
,
8740 Prefix pfx
, Int sz
, Long delta0
)
8743 UChar rm
= getUChar(delta0
);
8746 IRType ty
= szToITy(sz
);
8747 IRTemp tmpd
= newTemp(ty
);
8748 IRTemp tmpt0
= newTemp(ty
);
8749 IRTemp tmpt1
= newTemp(ty
);
8751 /* There are 3 cases to consider:
8753 reg-reg: ignore any lock prefix,
8754 generate 'naive' (non-atomic) sequence
8756 reg-mem, not locked: ignore any lock prefix, generate 'naive'
8757 (non-atomic) sequence
8759 reg-mem, locked: use IRCAS
8762 if (epartIsReg(rm
)) {
8764 assign( tmpd
, getIRegE(sz
, pfx
, rm
) );
8765 assign( tmpt0
, getIRegG(sz
, pfx
, rm
) );
8766 assign( tmpt1
, binop(mkSizedOp(ty
,Iop_Add8
),
8767 mkexpr(tmpd
), mkexpr(tmpt0
)) );
8768 setFlags_DEP1_DEP2( Iop_Add8
, tmpd
, tmpt0
, ty
);
8769 putIRegG(sz
, pfx
, rm
, mkexpr(tmpd
));
8770 putIRegE(sz
, pfx
, rm
, mkexpr(tmpt1
));
8771 DIP("xadd%c %s, %s\n",
8772 nameISize(sz
), nameIRegG(sz
,pfx
,rm
), nameIRegE(sz
,pfx
,rm
));
8776 else if (!epartIsReg(rm
) && !haveLOCK(pfx
)) {
8778 IRTemp addr
= disAMode ( &len
, vbi
, pfx
, delta0
, dis_buf
, 0 );
8779 assign( tmpd
, loadLE(ty
, mkexpr(addr
)) );
8780 assign( tmpt0
, getIRegG(sz
, pfx
, rm
) );
8781 assign( tmpt1
, binop(mkSizedOp(ty
,Iop_Add8
),
8782 mkexpr(tmpd
), mkexpr(tmpt0
)) );
8783 setFlags_DEP1_DEP2( Iop_Add8
, tmpd
, tmpt0
, ty
);
8784 storeLE( mkexpr(addr
), mkexpr(tmpt1
) );
8785 putIRegG(sz
, pfx
, rm
, mkexpr(tmpd
));
8786 DIP("xadd%c %s, %s\n",
8787 nameISize(sz
), nameIRegG(sz
,pfx
,rm
), dis_buf
);
8791 else if (!epartIsReg(rm
) && haveLOCK(pfx
)) {
8793 IRTemp addr
= disAMode ( &len
, vbi
, pfx
, delta0
, dis_buf
, 0 );
8794 assign( tmpd
, loadLE(ty
, mkexpr(addr
)) );
8795 assign( tmpt0
, getIRegG(sz
, pfx
, rm
) );
8796 assign( tmpt1
, binop(mkSizedOp(ty
,Iop_Add8
),
8797 mkexpr(tmpd
), mkexpr(tmpt0
)) );
8798 casLE( mkexpr(addr
), mkexpr(tmpd
)/*expVal*/,
8799 mkexpr(tmpt1
)/*newVal*/, guest_RIP_curr_instr
);
8800 setFlags_DEP1_DEP2( Iop_Add8
, tmpd
, tmpt0
, ty
);
8801 putIRegG(sz
, pfx
, rm
, mkexpr(tmpd
));
8802 DIP("xadd%c %s, %s\n",
8803 nameISize(sz
), nameIRegG(sz
,pfx
,rm
), dis_buf
);
8811 //.. /* Move 16 bits from Ew (ireg or mem) to G (a segment register). */
8814 //.. UInt dis_mov_Ew_Sw ( UChar sorb, Long delta0 )
8818 //.. UChar rm = getUChar(delta0);
8819 //.. HChar dis_buf[50];
8821 //.. if (epartIsReg(rm)) {
8822 //.. putSReg( gregOfRM(rm), getIReg(2, eregOfRM(rm)) );
8823 //.. DIP("movw %s,%s\n", nameIReg(2,eregOfRM(rm)), nameSReg(gregOfRM(rm)));
8824 //.. return 1+delta0;
8826 //.. addr = disAMode ( &len, sorb, delta0, dis_buf );
8827 //.. putSReg( gregOfRM(rm), loadLE(Ity_I16, mkexpr(addr)) );
8828 //.. DIP("movw %s,%s\n", dis_buf, nameSReg(gregOfRM(rm)));
8829 //.. return len+delta0;
8833 //.. /* Move 16 bits from G (a segment register) to Ew (ireg or mem). If
8834 //.. dst is ireg and sz==4, zero out top half of it. */
8837 //.. UInt dis_mov_Sw_Ew ( UChar sorb,
8843 //.. UChar rm = getUChar(delta0);
8844 //.. HChar dis_buf[50];
8846 //.. vassert(sz == 2 || sz == 4);
8848 //.. if (epartIsReg(rm)) {
8850 //.. putIReg(4, eregOfRM(rm), unop(Iop_16Uto32, getSReg(gregOfRM(rm))));
8852 //.. putIReg(2, eregOfRM(rm), getSReg(gregOfRM(rm)));
8854 //.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), nameIReg(sz,eregOfRM(rm)));
8855 //.. return 1+delta0;
8857 //.. addr = disAMode ( &len, sorb, delta0, dis_buf );
8858 //.. storeLE( mkexpr(addr), getSReg(gregOfRM(rm)) );
8859 //.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), dis_buf);
8860 //.. return len+delta0;
8864 /* Handle move instructions of the form
8866 mov sreg, reg-or-mem
8867 Is passed the a ptr to the modRM byte, and the data size. Returns
8868 the address advanced completely over this instruction.
8870 VEX does not currently simulate segment registers on AMD64 which means that
8871 instead of moving a value of a segment register, zero is moved to the
8872 destination. The zero value represents a null (unused) selector. This is
8873 not correct (especially for the %cs, %fs and %gs registers) but it seems to
8874 provide a sufficient simulation for currently seen programs that use this
8875 instruction. If some program actually decides to use the obtained segment
8876 selector for something meaningful then the zero value should be a clear
8877 indicator that there is some problem.
8880 E(dst) is reg-or-mem
8882 If E is reg, --> PUT $0, %E
8884 If E is mem, --> (getAddr E) -> tmpa
8888 ULong
dis_mov_S_E ( const VexAbiInfo
* vbi
,
8894 UChar rm
= getUChar(delta0
);
8897 if (epartIsReg(rm
)) {
8898 putIRegE(size
, pfx
, rm
, mkU(szToITy(size
), 0));
8899 DIP("mov %s,%s\n", nameSReg(gregOfRexRM(pfx
, rm
)),
8900 nameIRegE(size
, pfx
, rm
));
8904 /* E refers to memory */
8906 IRTemp addr
= disAMode(&len
, vbi
, pfx
, delta0
, dis_buf
, 0);
8907 storeLE(mkexpr(addr
), mkU16(0));
8908 DIP("mov %s,%s\n", nameSReg(gregOfRexRM(pfx
, rm
)),
8915 //.. void dis_push_segreg ( UInt sreg, Int sz )
8917 //.. IRTemp t1 = newTemp(Ity_I16);
8918 //.. IRTemp ta = newTemp(Ity_I32);
8919 //.. vassert(sz == 2 || sz == 4);
8921 //.. assign( t1, getSReg(sreg) );
8922 //.. assign( ta, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)) );
8923 //.. putIReg(4, R_ESP, mkexpr(ta));
8924 //.. storeLE( mkexpr(ta), mkexpr(t1) );
8926 //.. DIP("pushw %s\n", nameSReg(sreg));
8930 //.. void dis_pop_segreg ( UInt sreg, Int sz )
8932 //.. IRTemp t1 = newTemp(Ity_I16);
8933 //.. IRTemp ta = newTemp(Ity_I32);
8934 //.. vassert(sz == 2 || sz == 4);
8936 //.. assign( ta, getIReg(4, R_ESP) );
8937 //.. assign( t1, loadLE(Ity_I16, mkexpr(ta)) );
8939 //.. putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(ta), mkU32(sz)) );
8940 //.. putSReg( sreg, mkexpr(t1) );
8941 //.. DIP("pop %s\n", nameSReg(sreg));
8945 void dis_ret ( /*MOD*/DisResult
* dres
, const VexAbiInfo
* vbi
, ULong d64
)
8947 IRTemp t1
= newTemp(Ity_I64
);
8948 IRTemp t2
= newTemp(Ity_I64
);
8949 IRTemp t3
= newTemp(Ity_I64
);
8950 assign(t1
, getIReg64(R_RSP
));
8951 assign(t2
, loadLE(Ity_I64
,mkexpr(t1
)));
8952 assign(t3
, binop(Iop_Add64
, mkexpr(t1
), mkU64(8+d64
)));
8953 putIReg64(R_RSP
, mkexpr(t3
));
8954 make_redzone_AbiHint(vbi
, t3
, t2
/*nia*/, "ret");
8955 jmp_treg(dres
, Ijk_Ret
, t2
);
8956 vassert(dres
->whatNext
== Dis_StopHere
);
8960 /*------------------------------------------------------------*/
8961 /*--- SSE/SSE2/SSE3 helpers ---*/
8962 /*------------------------------------------------------------*/
8964 /* Indicates whether the op requires a rounding-mode argument. Note
8965 that this covers only vector floating point arithmetic ops, and
8966 omits the scalar ones that need rounding modes. Note also that
8967 inconsistencies here will get picked up later by the IR sanity
8968 checker, so this isn't correctness-critical. */
8969 static Bool
requiresRMode ( IROp op
)
8973 case Iop_Add32Fx4
: case Iop_Sub32Fx4
:
8974 case Iop_Mul32Fx4
: case Iop_Div32Fx4
:
8975 case Iop_Add64Fx2
: case Iop_Sub64Fx2
:
8976 case Iop_Mul64Fx2
: case Iop_Div64Fx2
:
8978 case Iop_Add32Fx8
: case Iop_Sub32Fx8
:
8979 case Iop_Mul32Fx8
: case Iop_Div32Fx8
:
8980 case Iop_Add64Fx4
: case Iop_Sub64Fx4
:
8981 case Iop_Mul64Fx4
: case Iop_Div64Fx4
:
8990 /* Worker function; do not call directly.
8991 Handles full width G = G `op` E and G = (not G) `op` E.
8994 static ULong
dis_SSE_E_to_G_all_wrk (
8995 const VexAbiInfo
* vbi
,
8996 Prefix pfx
, Long delta
,
8997 const HChar
* opname
, IROp op
,
9004 UChar rm
= getUChar(delta
);
9005 Bool needsRMode
= requiresRMode(op
);
9007 = invertG
? unop(Iop_NotV128
, getXMMReg(gregOfRexRM(pfx
,rm
)))
9008 : getXMMReg(gregOfRexRM(pfx
,rm
));
9009 if (epartIsReg(rm
)) {
9011 gregOfRexRM(pfx
,rm
),
9013 ? triop(op
, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
9015 getXMMReg(eregOfRexRM(pfx
,rm
)))
9017 getXMMReg(eregOfRexRM(pfx
,rm
)))
9019 DIP("%s %s,%s\n", opname
,
9020 nameXMMReg(eregOfRexRM(pfx
,rm
)),
9021 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9024 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
9026 gregOfRexRM(pfx
,rm
),
9028 ? triop(op
, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
9030 loadLE(Ity_V128
, mkexpr(addr
)))
9032 loadLE(Ity_V128
, mkexpr(addr
)))
9034 DIP("%s %s,%s\n", opname
,
9036 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9042 /* All lanes SSE binary operation, G = G `op` E. */
9045 ULong
dis_SSE_E_to_G_all ( const VexAbiInfo
* vbi
,
9046 Prefix pfx
, Long delta
,
9047 const HChar
* opname
, IROp op
)
9049 return dis_SSE_E_to_G_all_wrk( vbi
, pfx
, delta
, opname
, op
, False
);
9052 /* All lanes SSE binary operation, G = (not G) `op` E. */
9055 ULong
dis_SSE_E_to_G_all_invG ( const VexAbiInfo
* vbi
,
9056 Prefix pfx
, Long delta
,
9057 const HChar
* opname
, IROp op
)
9059 return dis_SSE_E_to_G_all_wrk( vbi
, pfx
, delta
, opname
, op
, True
);
9063 /* Lowest 32-bit lane only SSE binary operation, G = G `op` E. */
9065 static ULong
dis_SSE_E_to_G_lo32 ( const VexAbiInfo
* vbi
,
9066 Prefix pfx
, Long delta
,
9067 const HChar
* opname
, IROp op
)
9072 UChar rm
= getUChar(delta
);
9073 IRExpr
* gpart
= getXMMReg(gregOfRexRM(pfx
,rm
));
9074 if (epartIsReg(rm
)) {
9075 putXMMReg( gregOfRexRM(pfx
,rm
),
9077 getXMMReg(eregOfRexRM(pfx
,rm
))) );
9078 DIP("%s %s,%s\n", opname
,
9079 nameXMMReg(eregOfRexRM(pfx
,rm
)),
9080 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9083 /* We can only do a 32-bit memory read, so the upper 3/4 of the
9084 E operand needs to be made simply of zeroes. */
9085 IRTemp epart
= newTemp(Ity_V128
);
9086 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
9087 assign( epart
, unop( Iop_32UtoV128
,
9088 loadLE(Ity_I32
, mkexpr(addr
))) );
9089 putXMMReg( gregOfRexRM(pfx
,rm
),
9090 binop(op
, gpart
, mkexpr(epart
)) );
9091 DIP("%s %s,%s\n", opname
,
9093 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9099 /* Lower 64-bit lane only SSE binary operation, G = G `op` E. */
9101 static ULong
dis_SSE_E_to_G_lo64 ( const VexAbiInfo
* vbi
,
9102 Prefix pfx
, Long delta
,
9103 const HChar
* opname
, IROp op
)
9108 UChar rm
= getUChar(delta
);
9109 IRExpr
* gpart
= getXMMReg(gregOfRexRM(pfx
,rm
));
9110 if (epartIsReg(rm
)) {
9111 putXMMReg( gregOfRexRM(pfx
,rm
),
9113 getXMMReg(eregOfRexRM(pfx
,rm
))) );
9114 DIP("%s %s,%s\n", opname
,
9115 nameXMMReg(eregOfRexRM(pfx
,rm
)),
9116 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9119 /* We can only do a 64-bit memory read, so the upper half of the
9120 E operand needs to be made simply of zeroes. */
9121 IRTemp epart
= newTemp(Ity_V128
);
9122 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
9123 assign( epart
, unop( Iop_64UtoV128
,
9124 loadLE(Ity_I64
, mkexpr(addr
))) );
9125 putXMMReg( gregOfRexRM(pfx
,rm
),
9126 binop(op
, gpart
, mkexpr(epart
)) );
9127 DIP("%s %s,%s\n", opname
,
9129 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9135 /* All lanes unary SSE operation, G = op(E). */
9137 static ULong
dis_SSE_E_to_G_unary_all (
9138 const VexAbiInfo
* vbi
,
9139 Prefix pfx
, Long delta
,
9140 const HChar
* opname
, IROp op
9146 UChar rm
= getUChar(delta
);
9147 // Sqrt32Fx4 and Sqrt64Fx2 take a rounding mode, which is faked
9148 // up in the usual way.
9149 Bool needsIRRM
= op
== Iop_Sqrt32Fx4
|| op
== Iop_Sqrt64Fx2
;
9150 if (epartIsReg(rm
)) {
9151 IRExpr
* src
= getXMMReg(eregOfRexRM(pfx
,rm
));
9152 /* XXXROUNDINGFIXME */
9153 IRExpr
* res
= needsIRRM
? binop(op
, get_FAKE_roundingmode(), src
)
9155 putXMMReg( gregOfRexRM(pfx
,rm
), res
);
9156 DIP("%s %s,%s\n", opname
,
9157 nameXMMReg(eregOfRexRM(pfx
,rm
)),
9158 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9161 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
9162 IRExpr
* src
= loadLE(Ity_V128
, mkexpr(addr
));
9163 /* XXXROUNDINGFIXME */
9164 IRExpr
* res
= needsIRRM
? binop(op
, get_FAKE_roundingmode(), src
)
9166 putXMMReg( gregOfRexRM(pfx
,rm
), res
);
9167 DIP("%s %s,%s\n", opname
,
9169 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9175 /* Lowest 32-bit lane only unary SSE operation, G = op(E). */
9177 static ULong
dis_SSE_E_to_G_unary_lo32 (
9178 const VexAbiInfo
* vbi
,
9179 Prefix pfx
, Long delta
,
9180 const HChar
* opname
, IROp op
9183 /* First we need to get the old G value and patch the low 32 bits
9184 of the E operand into it. Then apply op and write back to G. */
9188 UChar rm
= getUChar(delta
);
9189 IRTemp oldG0
= newTemp(Ity_V128
);
9190 IRTemp oldG1
= newTemp(Ity_V128
);
9192 assign( oldG0
, getXMMReg(gregOfRexRM(pfx
,rm
)) );
9194 if (epartIsReg(rm
)) {
9196 binop( Iop_SetV128lo32
,
9198 getXMMRegLane32(eregOfRexRM(pfx
,rm
), 0)) );
9199 putXMMReg( gregOfRexRM(pfx
,rm
), unop(op
, mkexpr(oldG1
)) );
9200 DIP("%s %s,%s\n", opname
,
9201 nameXMMReg(eregOfRexRM(pfx
,rm
)),
9202 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9205 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
9207 binop( Iop_SetV128lo32
,
9209 loadLE(Ity_I32
, mkexpr(addr
)) ));
9210 putXMMReg( gregOfRexRM(pfx
,rm
), unop(op
, mkexpr(oldG1
)) );
9211 DIP("%s %s,%s\n", opname
,
9213 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9219 /* Lowest 64-bit lane only unary SSE operation, G = op(E). */
9221 static ULong
dis_SSE_E_to_G_unary_lo64 (
9222 const VexAbiInfo
* vbi
,
9223 Prefix pfx
, Long delta
,
9224 const HChar
* opname
, IROp op
9227 /* First we need to get the old G value and patch the low 64 bits
9228 of the E operand into it. Then apply op and write back to G. */
9232 UChar rm
= getUChar(delta
);
9233 IRTemp oldG0
= newTemp(Ity_V128
);
9234 IRTemp oldG1
= newTemp(Ity_V128
);
9236 assign( oldG0
, getXMMReg(gregOfRexRM(pfx
,rm
)) );
9238 if (epartIsReg(rm
)) {
9240 binop( Iop_SetV128lo64
,
9242 getXMMRegLane64(eregOfRexRM(pfx
,rm
), 0)) );
9243 putXMMReg( gregOfRexRM(pfx
,rm
), unop(op
, mkexpr(oldG1
)) );
9244 DIP("%s %s,%s\n", opname
,
9245 nameXMMReg(eregOfRexRM(pfx
,rm
)),
9246 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9249 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
9251 binop( Iop_SetV128lo64
,
9253 loadLE(Ity_I64
, mkexpr(addr
)) ));
9254 putXMMReg( gregOfRexRM(pfx
,rm
), unop(op
, mkexpr(oldG1
)) );
9255 DIP("%s %s,%s\n", opname
,
9257 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9263 /* SSE integer binary operation:
9264 G = G `op` E (eLeft == False)
9265 G = E `op` G (eLeft == True)
9267 static ULong
dis_SSEint_E_to_G(
9268 const VexAbiInfo
* vbi
,
9269 Prefix pfx
, Long delta
,
9270 const HChar
* opname
, IROp op
,
9277 UChar rm
= getUChar(delta
);
9278 IRExpr
* gpart
= getXMMReg(gregOfRexRM(pfx
,rm
));
9279 IRExpr
* epart
= NULL
;
9280 if (epartIsReg(rm
)) {
9281 epart
= getXMMReg(eregOfRexRM(pfx
,rm
));
9282 DIP("%s %s,%s\n", opname
,
9283 nameXMMReg(eregOfRexRM(pfx
,rm
)),
9284 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9287 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
9288 epart
= loadLE(Ity_V128
, mkexpr(addr
));
9289 DIP("%s %s,%s\n", opname
,
9291 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9294 putXMMReg( gregOfRexRM(pfx
,rm
),
9295 eLeft
? binop(op
, epart
, gpart
)
9296 : binop(op
, gpart
, epart
) );
9301 /* Helper for doing SSE FP comparisons. False return ==> unhandled.
9302 This is all a bit of a kludge in that it ignores the subtleties of
9303 ordered-vs-unordered and signalling-vs-nonsignalling in the Intel
9304 spec. The meaning of the outputs is as follows:
9306 preZeroP: the active lanes of both incoming arguments should be set to zero
9307 before performing the operation. IOW the actual args are to be ignored
9308 and instead zero bits are to be used. This is a bit strange but is needed
9309 to make the constant-false/true variants (FALSE_OQ, TRUE_UQ, FALSE_OS,
9312 preSwapP: the args should be swapped before performing the operation. Note
9313 that zeroing arg input sections (per preZeroP) and swapping them (per
9314 preSwapP) are allowed to happen in either order; the result is the same.
9316 opP: this returns the actual comparison op to perform.
9318 postNotP: if true, the result(ing vector) of the comparison operation should
9319 be bitwise-not-ed. Note that only the lanes of the output actually
9320 computed by opP should be not-ed.
9322 static Bool
findSSECmpOp ( /*OUT*/Bool
* preZeroP
,
9323 /*OUT*/Bool
* preSwapP
,
9325 /*OUT*/Bool
* postNotP
,
9326 UInt imm8
, Bool all_lanes
, Int sz
)
9328 vassert(*preZeroP
== False
);
9329 vassert(*preSwapP
== False
);
9330 vassert(*opP
== Iop_INVALID
);
9331 vassert(*postNotP
== False
);
9333 if (imm8
>= 32) return False
;
9335 /* First, compute a (preZero, preSwap, op, postNot) quad from
9336 the supplied imm8. */
9337 Bool preZero
= False
;
9338 Bool preSwap
= False
;
9339 IROp op
= Iop_INVALID
;
9340 Bool postNot
= False
;
9342 # define XXX(_preZero, _preSwap, _op, _postNot) \
9343 { preZero = _preZero; preSwap = _preSwap; op = _op; postNot = _postNot; }
9344 // If you add a case here, add a corresponding test for both VCMPSD_128
9345 // and VCMPSS_128 in avx-1.c.
9346 // Cases 0xA and above are
9347 // "Enhanced Comparison Predicate[s] for VEX-Encoded [insns]"
9349 // "O" = ordered, "U" = unordered
9350 // "Q" = non-signalling (quiet), "S" = signalling
9352 // replace active arg lanes in operands with zero
9354 // | swap operands before applying the cmp op?
9356 // | | cmp op invert active lanes after?
9359 case 0x0: XXX(False
, False
, Iop_CmpEQ32Fx4
, False
); break; // EQ_OQ
9360 case 0x8: XXX(False
, False
, Iop_CmpEQ32Fx4
, False
); break; // EQ_UQ
9361 case 0x10: XXX(False
, False
, Iop_CmpEQ32Fx4
, False
); break; // EQ_OS
9362 case 0x18: XXX(False
, False
, Iop_CmpEQ32Fx4
, False
); break; // EQ_US
9364 case 0x1: XXX(False
, False
, Iop_CmpLT32Fx4
, False
); break; // LT_OS
9365 case 0x11: XXX(False
, False
, Iop_CmpLT32Fx4
, False
); break; // LT_OQ
9367 case 0x2: XXX(False
, False
, Iop_CmpLE32Fx4
, False
); break; // LE_OS
9368 case 0x12: XXX(False
, False
, Iop_CmpLE32Fx4
, False
); break; // LE_OQ
9370 case 0x3: XXX(False
, False
, Iop_CmpUN32Fx4
, False
); break; // UNORD_Q
9371 case 0x13: XXX(False
, False
, Iop_CmpUN32Fx4
, False
); break; // UNORD_S
9373 // 0xC: this isn't really right because it returns all-1s when
9374 // either operand is a NaN, and it should return all-0s.
9375 case 0x4: XXX(False
, False
, Iop_CmpEQ32Fx4
, True
); break; // NEQ_UQ
9376 case 0xC: XXX(False
, False
, Iop_CmpEQ32Fx4
, True
); break; // NEQ_OQ
9377 case 0x14: XXX(False
, False
, Iop_CmpEQ32Fx4
, True
); break; // NEQ_US
9378 case 0x1C: XXX(False
, False
, Iop_CmpEQ32Fx4
, True
); break; // NEQ_OS
9380 case 0x5: XXX(False
, False
, Iop_CmpLT32Fx4
, True
); break; // NLT_US
9381 case 0x15: XXX(False
, False
, Iop_CmpLT32Fx4
, True
); break; // NLT_UQ
9383 case 0x6: XXX(False
, False
, Iop_CmpLE32Fx4
, True
); break; // NLE_US
9384 case 0x16: XXX(False
, False
, Iop_CmpLE32Fx4
, True
); break; // NLE_UQ
9386 case 0x7: XXX(False
, False
, Iop_CmpUN32Fx4
, True
); break; // ORD_Q
9387 case 0x17: XXX(False
, False
, Iop_CmpUN32Fx4
, True
); break; // ORD_S
9389 case 0x9: XXX(False
, True
, Iop_CmpLE32Fx4
, True
); break; // NGE_US
9390 case 0x19: XXX(False
, True
, Iop_CmpLE32Fx4
, True
); break; // NGE_UQ
9392 case 0xA: XXX(False
, True
, Iop_CmpLT32Fx4
, True
); break; // NGT_US
9393 case 0x1A: XXX(False
, True
, Iop_CmpLT32Fx4
, True
); break; // NGT_UQ
9395 case 0xD: XXX(False
, True
, Iop_CmpLE32Fx4
, False
); break; // GE_OS
9396 case 0x1D: XXX(False
, True
, Iop_CmpLE32Fx4
, False
); break; // GE_OQ
9398 case 0xE: XXX(False
, True
, Iop_CmpLT32Fx4
, False
); break; // GT_OS
9399 case 0x1E: XXX(False
, True
, Iop_CmpLT32Fx4
, False
); break; // GT_OQ
9400 // Constant-value-result ops
9401 case 0xB: XXX(True
, False
, Iop_CmpEQ32Fx4
, True
); break; // FALSE_OQ
9402 case 0xF: XXX(True
, False
, Iop_CmpEQ32Fx4
, False
); break; // TRUE_UQ
9403 case 0x1B: XXX(True
, False
, Iop_CmpEQ32Fx4
, True
); break; // FALSE_OS
9404 case 0x1F: XXX(True
, False
, Iop_CmpEQ32Fx4
, False
); break; // TRUE_US
9405 /* Don't forget to add test cases to VCMPSS_128_<imm8> in
9406 avx-1.c if new cases turn up. */
9410 if (op
== Iop_INVALID
) return False
;
9412 /* Now convert the op into one with the same arithmetic but that is
9413 correct for the width and laneage requirements. */
9415 /**/ if (sz
== 4 && all_lanes
) {
9417 case Iop_CmpEQ32Fx4
: op
= Iop_CmpEQ32Fx4
; break;
9418 case Iop_CmpLT32Fx4
: op
= Iop_CmpLT32Fx4
; break;
9419 case Iop_CmpLE32Fx4
: op
= Iop_CmpLE32Fx4
; break;
9420 case Iop_CmpUN32Fx4
: op
= Iop_CmpUN32Fx4
; break;
9421 default: vassert(0);
9424 else if (sz
== 4 && !all_lanes
) {
9426 case Iop_CmpEQ32Fx4
: op
= Iop_CmpEQ32F0x4
; break;
9427 case Iop_CmpLT32Fx4
: op
= Iop_CmpLT32F0x4
; break;
9428 case Iop_CmpLE32Fx4
: op
= Iop_CmpLE32F0x4
; break;
9429 case Iop_CmpUN32Fx4
: op
= Iop_CmpUN32F0x4
; break;
9430 default: vassert(0);
9433 else if (sz
== 8 && all_lanes
) {
9435 case Iop_CmpEQ32Fx4
: op
= Iop_CmpEQ64Fx2
; break;
9436 case Iop_CmpLT32Fx4
: op
= Iop_CmpLT64Fx2
; break;
9437 case Iop_CmpLE32Fx4
: op
= Iop_CmpLE64Fx2
; break;
9438 case Iop_CmpUN32Fx4
: op
= Iop_CmpUN64Fx2
; break;
9439 default: vassert(0);
9442 else if (sz
== 8 && !all_lanes
) {
9444 case Iop_CmpEQ32Fx4
: op
= Iop_CmpEQ64F0x2
; break;
9445 case Iop_CmpLT32Fx4
: op
= Iop_CmpLT64F0x2
; break;
9446 case Iop_CmpLE32Fx4
: op
= Iop_CmpLE64F0x2
; break;
9447 case Iop_CmpUN32Fx4
: op
= Iop_CmpUN64F0x2
; break;
9448 default: vassert(0);
9452 vpanic("findSSECmpOp(amd64,guest)");
9456 // In this case, preSwap is irrelevant, but assert anyway.
9457 vassert(preSwap
== False
);
9459 *preZeroP
= preZero
; *preSwapP
= preSwap
; *opP
= op
; *postNotP
= postNot
;
9464 /* Handles SSE 32F/64F comparisons. It can fail, in which case it
9465 returns the original delta to indicate failure. */
9467 static Long
dis_SSE_cmp_E_to_G ( const VexAbiInfo
* vbi
,
9468 Prefix pfx
, Long delta
,
9469 const HChar
* opname
, Bool all_lanes
, Int sz
)
9471 Long delta0
= delta
;
9476 Bool preZero
= False
;
9477 Bool preSwap
= False
;
9478 IROp op
= Iop_INVALID
;
9479 Bool postNot
= False
;
9480 IRTemp plain
= newTemp(Ity_V128
);
9481 UChar rm
= getUChar(delta
);
9483 vassert(sz
== 4 || sz
== 8);
9484 if (epartIsReg(rm
)) {
9485 imm8
= getUChar(delta
+1);
9486 if (imm8
>= 8) return delta0
; /* FAIL */
9487 Bool ok
= findSSECmpOp(&preZero
, &preSwap
, &op
, &postNot
,
9488 imm8
, all_lanes
, sz
);
9489 if (!ok
) return delta0
; /* FAIL */
9490 vassert(!preZero
); /* never needed for imm8 < 8 */
9491 vassert(!preSwap
); /* never needed for imm8 < 8 */
9492 assign( plain
, binop(op
, getXMMReg(gregOfRexRM(pfx
,rm
)),
9493 getXMMReg(eregOfRexRM(pfx
,rm
))) );
9495 DIP("%s $%u,%s,%s\n", opname
,
9497 nameXMMReg(eregOfRexRM(pfx
,rm
)),
9498 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9500 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
9501 imm8
= getUChar(delta
+alen
);
9502 if (imm8
>= 8) return delta0
; /* FAIL */
9503 Bool ok
= findSSECmpOp(&preZero
, &preSwap
, &op
, &postNot
,
9504 imm8
, all_lanes
, sz
);
9505 if (!ok
) return delta0
; /* FAIL */
9506 vassert(!preZero
); /* never needed for imm8 < 8 */
9507 vassert(!preSwap
); /* never needed for imm8 < 8 */
9511 getXMMReg(gregOfRexRM(pfx
,rm
)),
9513 ? loadLE(Ity_V128
, mkexpr(addr
))
9515 ? unop( Iop_64UtoV128
, loadLE(Ity_I64
, mkexpr(addr
)))
9517 unop( Iop_32UtoV128
, loadLE(Ity_I32
, mkexpr(addr
)))
9521 DIP("%s $%u,%s,%s\n", opname
,
9524 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9527 if (postNot
&& all_lanes
) {
9528 putXMMReg( gregOfRexRM(pfx
,rm
),
9529 unop(Iop_NotV128
, mkexpr(plain
)) );
9532 if (postNot
&& !all_lanes
) {
9533 mask
= toUShort(sz
==4 ? 0x000F : 0x00FF);
9534 putXMMReg( gregOfRexRM(pfx
,rm
),
9535 binop(Iop_XorV128
, mkexpr(plain
), mkV128(mask
)) );
9538 putXMMReg( gregOfRexRM(pfx
,rm
), mkexpr(plain
) );
9545 /* Vector by scalar shift of G by the amount specified at the bottom
9548 static ULong
dis_SSE_shiftG_byE ( const VexAbiInfo
* vbi
,
9549 Prefix pfx
, Long delta
,
9550 const HChar
* opname
, IROp op
)
9556 UChar rm
= getUChar(delta
);
9557 IRTemp g0
= newTemp(Ity_V128
);
9558 IRTemp g1
= newTemp(Ity_V128
);
9559 IRTemp amt
= newTemp(Ity_I64
);
9560 IRTemp amt8
= newTemp(Ity_I8
);
9561 if (epartIsReg(rm
)) {
9562 assign( amt
, getXMMRegLane64(eregOfRexRM(pfx
,rm
), 0) );
9563 DIP("%s %s,%s\n", opname
,
9564 nameXMMReg(eregOfRexRM(pfx
,rm
)),
9565 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9568 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
9569 assign( amt
, loadLE(Ity_I64
, mkexpr(addr
)) );
9570 DIP("%s %s,%s\n", opname
,
9572 nameXMMReg(gregOfRexRM(pfx
,rm
)) );
9575 assign( g0
, getXMMReg(gregOfRexRM(pfx
,rm
)) );
9576 assign( amt8
, unop(Iop_64to8
, mkexpr(amt
)) );
9578 shl
= shr
= sar
= False
;
9581 case Iop_ShlN16x8
: shl
= True
; size
= 32; break;
9582 case Iop_ShlN32x4
: shl
= True
; size
= 32; break;
9583 case Iop_ShlN64x2
: shl
= True
; size
= 64; break;
9584 case Iop_SarN16x8
: sar
= True
; size
= 16; break;
9585 case Iop_SarN32x4
: sar
= True
; size
= 32; break;
9586 case Iop_ShrN16x8
: shr
= True
; size
= 16; break;
9587 case Iop_ShrN32x4
: shr
= True
; size
= 32; break;
9588 case Iop_ShrN64x2
: shr
= True
; size
= 64; break;
9589 default: vassert(0);
9596 binop(Iop_CmpLT64U
, mkexpr(amt
), mkU64(size
)),
9597 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
9606 binop(Iop_CmpLT64U
, mkexpr(amt
), mkU64(size
)),
9607 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
9608 binop(op
, mkexpr(g0
), mkU8(size
-1))
9615 putXMMReg( gregOfRexRM(pfx
,rm
), mkexpr(g1
) );
9620 /* Vector by scalar shift of E by an immediate byte. */
9623 ULong
dis_SSE_shiftE_imm ( Prefix pfx
,
9624 Long delta
, const HChar
* opname
, IROp op
)
9627 UChar rm
= getUChar(delta
);
9628 IRTemp e0
= newTemp(Ity_V128
);
9629 IRTemp e1
= newTemp(Ity_V128
);
9631 vassert(epartIsReg(rm
));
9632 vassert(gregLO3ofRM(rm
) == 2
9633 || gregLO3ofRM(rm
) == 4 || gregLO3ofRM(rm
) == 6);
9634 amt
= getUChar(delta
+1);
9636 DIP("%s $%d,%s\n", opname
,
9638 nameXMMReg(eregOfRexRM(pfx
,rm
)) );
9639 assign( e0
, getXMMReg(eregOfRexRM(pfx
,rm
)) );
9641 shl
= shr
= sar
= False
;
9644 case Iop_ShlN16x8
: shl
= True
; size
= 16; break;
9645 case Iop_ShlN32x4
: shl
= True
; size
= 32; break;
9646 case Iop_ShlN64x2
: shl
= True
; size
= 64; break;
9647 case Iop_SarN16x8
: sar
= True
; size
= 16; break;
9648 case Iop_SarN32x4
: sar
= True
; size
= 32; break;
9649 case Iop_ShrN16x8
: shr
= True
; size
= 16; break;
9650 case Iop_ShrN32x4
: shr
= True
; size
= 32; break;
9651 case Iop_ShrN64x2
: shr
= True
; size
= 64; break;
9652 default: vassert(0);
9656 assign( e1
, amt
>= size
9658 : binop(op
, mkexpr(e0
), mkU8(amt
))
9662 assign( e1
, amt
>= size
9663 ? binop(op
, mkexpr(e0
), mkU8(size
-1))
9664 : binop(op
, mkexpr(e0
), mkU8(amt
))
9670 putXMMReg( eregOfRexRM(pfx
,rm
), mkexpr(e1
) );
9675 /* Get the current SSE rounding mode. */
9677 static IRExpr
* /* :: Ity_I32 */ get_sse_roundingmode ( void )
9682 IRExpr_Get( OFFB_SSEROUND
, Ity_I64
),
9686 static void put_sse_roundingmode ( IRExpr
* sseround
)
9688 vassert(typeOfIRExpr(irsb
->tyenv
, sseround
) == Ity_I32
);
9689 stmt( IRStmt_Put( OFFB_SSEROUND
,
9690 unop(Iop_32Uto64
,sseround
) ) );
9693 /* Break a V128-bit value up into four 32-bit ints. */
9695 static void breakupV128to32s ( IRTemp t128
,
9697 IRTemp
* t3
, IRTemp
* t2
,
9698 IRTemp
* t1
, IRTemp
* t0
)
9700 IRTemp hi64
= newTemp(Ity_I64
);
9701 IRTemp lo64
= newTemp(Ity_I64
);
9702 assign( hi64
, unop(Iop_V128HIto64
, mkexpr(t128
)) );
9703 assign( lo64
, unop(Iop_V128to64
, mkexpr(t128
)) );
9705 vassert(t0
&& *t0
== IRTemp_INVALID
);
9706 vassert(t1
&& *t1
== IRTemp_INVALID
);
9707 vassert(t2
&& *t2
== IRTemp_INVALID
);
9708 vassert(t3
&& *t3
== IRTemp_INVALID
);
9710 *t0
= newTemp(Ity_I32
);
9711 *t1
= newTemp(Ity_I32
);
9712 *t2
= newTemp(Ity_I32
);
9713 *t3
= newTemp(Ity_I32
);
9714 assign( *t0
, unop(Iop_64to32
, mkexpr(lo64
)) );
9715 assign( *t1
, unop(Iop_64HIto32
, mkexpr(lo64
)) );
9716 assign( *t2
, unop(Iop_64to32
, mkexpr(hi64
)) );
9717 assign( *t3
, unop(Iop_64HIto32
, mkexpr(hi64
)) );
9720 /* Construct a V128-bit value from four 32-bit ints. */
9722 static IRExpr
* mkV128from32s ( IRTemp t3
, IRTemp t2
,
9723 IRTemp t1
, IRTemp t0
)
9726 binop( Iop_64HLtoV128
,
9727 binop(Iop_32HLto64
, mkexpr(t3
), mkexpr(t2
)),
9728 binop(Iop_32HLto64
, mkexpr(t1
), mkexpr(t0
))
9732 /* Break a 64-bit value up into four 16-bit ints. */
9734 static void breakup64to16s ( IRTemp t64
,
9736 IRTemp
* t3
, IRTemp
* t2
,
9737 IRTemp
* t1
, IRTemp
* t0
)
9739 IRTemp hi32
= newTemp(Ity_I32
);
9740 IRTemp lo32
= newTemp(Ity_I32
);
9741 assign( hi32
, unop(Iop_64HIto32
, mkexpr(t64
)) );
9742 assign( lo32
, unop(Iop_64to32
, mkexpr(t64
)) );
9744 vassert(t0
&& *t0
== IRTemp_INVALID
);
9745 vassert(t1
&& *t1
== IRTemp_INVALID
);
9746 vassert(t2
&& *t2
== IRTemp_INVALID
);
9747 vassert(t3
&& *t3
== IRTemp_INVALID
);
9749 *t0
= newTemp(Ity_I16
);
9750 *t1
= newTemp(Ity_I16
);
9751 *t2
= newTemp(Ity_I16
);
9752 *t3
= newTemp(Ity_I16
);
9753 assign( *t0
, unop(Iop_32to16
, mkexpr(lo32
)) );
9754 assign( *t1
, unop(Iop_32HIto16
, mkexpr(lo32
)) );
9755 assign( *t2
, unop(Iop_32to16
, mkexpr(hi32
)) );
9756 assign( *t3
, unop(Iop_32HIto16
, mkexpr(hi32
)) );
9759 /* Construct a 64-bit value from four 16-bit ints. */
9761 static IRExpr
* mk64from16s ( IRTemp t3
, IRTemp t2
,
9762 IRTemp t1
, IRTemp t0
)
9765 binop( Iop_32HLto64
,
9766 binop(Iop_16HLto32
, mkexpr(t3
), mkexpr(t2
)),
9767 binop(Iop_16HLto32
, mkexpr(t1
), mkexpr(t0
))
9771 /* Break a V256-bit value up into four 64-bit ints. */
9773 static void breakupV256to64s ( IRTemp t256
,
9775 IRTemp
* t3
, IRTemp
* t2
,
9776 IRTemp
* t1
, IRTemp
* t0
)
9778 vassert(t0
&& *t0
== IRTemp_INVALID
);
9779 vassert(t1
&& *t1
== IRTemp_INVALID
);
9780 vassert(t2
&& *t2
== IRTemp_INVALID
);
9781 vassert(t3
&& *t3
== IRTemp_INVALID
);
9782 *t0
= newTemp(Ity_I64
);
9783 *t1
= newTemp(Ity_I64
);
9784 *t2
= newTemp(Ity_I64
);
9785 *t3
= newTemp(Ity_I64
);
9786 assign( *t0
, unop(Iop_V256to64_0
, mkexpr(t256
)) );
9787 assign( *t1
, unop(Iop_V256to64_1
, mkexpr(t256
)) );
9788 assign( *t2
, unop(Iop_V256to64_2
, mkexpr(t256
)) );
9789 assign( *t3
, unop(Iop_V256to64_3
, mkexpr(t256
)) );
9792 /* Break a V256-bit value up into two V128s. */
9794 static void breakupV256toV128s ( IRTemp t256
,
9796 IRTemp
* t1
, IRTemp
* t0
)
9798 vassert(t0
&& *t0
== IRTemp_INVALID
);
9799 vassert(t1
&& *t1
== IRTemp_INVALID
);
9800 *t0
= newTemp(Ity_V128
);
9801 *t1
= newTemp(Ity_V128
);
9802 assign(*t1
, unop(Iop_V256toV128_1
, mkexpr(t256
)));
9803 assign(*t0
, unop(Iop_V256toV128_0
, mkexpr(t256
)));
9806 /* Break a V256-bit value up into eight 32-bit ints. */
9808 static void breakupV256to32s ( IRTemp t256
,
9810 IRTemp
* t7
, IRTemp
* t6
,
9811 IRTemp
* t5
, IRTemp
* t4
,
9812 IRTemp
* t3
, IRTemp
* t2
,
9813 IRTemp
* t1
, IRTemp
* t0
)
9815 IRTemp t128_1
= IRTemp_INVALID
;
9816 IRTemp t128_0
= IRTemp_INVALID
;
9817 breakupV256toV128s( t256
, &t128_1
, &t128_0
);
9818 breakupV128to32s( t128_1
, t7
, t6
, t5
, t4
);
9819 breakupV128to32s( t128_0
, t3
, t2
, t1
, t0
);
9822 /* Break a V128-bit value up into two 64-bit ints. */
9824 static void breakupV128to64s ( IRTemp t128
,
9826 IRTemp
* t1
, IRTemp
* t0
)
9828 vassert(t0
&& *t0
== IRTemp_INVALID
);
9829 vassert(t1
&& *t1
== IRTemp_INVALID
);
9830 *t0
= newTemp(Ity_I64
);
9831 *t1
= newTemp(Ity_I64
);
9832 assign( *t0
, unop(Iop_V128to64
, mkexpr(t128
)) );
9833 assign( *t1
, unop(Iop_V128HIto64
, mkexpr(t128
)) );
9836 /* Construct a V256-bit value from eight 32-bit ints. */
9838 static IRExpr
* mkV256from32s ( IRTemp t7
, IRTemp t6
,
9839 IRTemp t5
, IRTemp t4
,
9840 IRTemp t3
, IRTemp t2
,
9841 IRTemp t1
, IRTemp t0
)
9844 binop( Iop_V128HLtoV256
,
9845 binop( Iop_64HLtoV128
,
9846 binop(Iop_32HLto64
, mkexpr(t7
), mkexpr(t6
)),
9847 binop(Iop_32HLto64
, mkexpr(t5
), mkexpr(t4
)) ),
9848 binop( Iop_64HLtoV128
,
9849 binop(Iop_32HLto64
, mkexpr(t3
), mkexpr(t2
)),
9850 binop(Iop_32HLto64
, mkexpr(t1
), mkexpr(t0
)) )
9854 /* Construct a V256-bit value from four 64-bit ints. */
9856 static IRExpr
* mkV256from64s ( IRTemp t3
, IRTemp t2
,
9857 IRTemp t1
, IRTemp t0
)
9860 binop( Iop_V128HLtoV256
,
9861 binop(Iop_64HLtoV128
, mkexpr(t3
), mkexpr(t2
)),
9862 binop(Iop_64HLtoV128
, mkexpr(t1
), mkexpr(t0
))
9866 /* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit
9867 values (aa,bb), computes, for each of the 4 16-bit lanes:
9869 (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1
9871 static IRExpr
* dis_PMULHRSW_helper ( IRExpr
* aax
, IRExpr
* bbx
)
9873 IRTemp aa
= newTemp(Ity_I64
);
9874 IRTemp bb
= newTemp(Ity_I64
);
9875 IRTemp aahi32s
= newTemp(Ity_I64
);
9876 IRTemp aalo32s
= newTemp(Ity_I64
);
9877 IRTemp bbhi32s
= newTemp(Ity_I64
);
9878 IRTemp bblo32s
= newTemp(Ity_I64
);
9879 IRTemp rHi
= newTemp(Ity_I64
);
9880 IRTemp rLo
= newTemp(Ity_I64
);
9881 IRTemp one32x2
= newTemp(Ity_I64
);
9886 binop(Iop_InterleaveHI16x4
, mkexpr(aa
), mkexpr(aa
)),
9890 binop(Iop_InterleaveLO16x4
, mkexpr(aa
), mkexpr(aa
)),
9894 binop(Iop_InterleaveHI16x4
, mkexpr(bb
), mkexpr(bb
)),
9898 binop(Iop_InterleaveLO16x4
, mkexpr(bb
), mkexpr(bb
)),
9900 assign(one32x2
, mkU64( (1ULL << 32) + 1 ));
9909 binop(Iop_Mul32x2
, mkexpr(aahi32s
), mkexpr(bbhi32s
)),
9925 binop(Iop_Mul32x2
, mkexpr(aalo32s
), mkexpr(bblo32s
)),
9934 binop(Iop_CatEvenLanes16x4
, mkexpr(rHi
), mkexpr(rLo
));
9937 /* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit
9938 values (aa,bb), computes, for each lane:
9940 if aa_lane < 0 then - bb_lane
9941 else if aa_lane > 0 then bb_lane
9944 static IRExpr
* dis_PSIGN_helper ( IRExpr
* aax
, IRExpr
* bbx
, Int laneszB
)
9946 IRTemp aa
= newTemp(Ity_I64
);
9947 IRTemp bb
= newTemp(Ity_I64
);
9948 IRTemp zero
= newTemp(Ity_I64
);
9949 IRTemp bbNeg
= newTemp(Ity_I64
);
9950 IRTemp negMask
= newTemp(Ity_I64
);
9951 IRTemp posMask
= newTemp(Ity_I64
);
9952 IROp opSub
= Iop_INVALID
;
9953 IROp opCmpGTS
= Iop_INVALID
;
9956 case 1: opSub
= Iop_Sub8x8
; opCmpGTS
= Iop_CmpGT8Sx8
; break;
9957 case 2: opSub
= Iop_Sub16x4
; opCmpGTS
= Iop_CmpGT16Sx4
; break;
9958 case 4: opSub
= Iop_Sub32x2
; opCmpGTS
= Iop_CmpGT32Sx2
; break;
9959 default: vassert(0);
9964 assign( zero
, mkU64(0) );
9965 assign( bbNeg
, binop(opSub
, mkexpr(zero
), mkexpr(bb
)) );
9966 assign( negMask
, binop(opCmpGTS
, mkexpr(zero
), mkexpr(aa
)) );
9967 assign( posMask
, binop(opCmpGTS
, mkexpr(aa
), mkexpr(zero
)) );
9971 binop(Iop_And64
, mkexpr(bb
), mkexpr(posMask
)),
9972 binop(Iop_And64
, mkexpr(bbNeg
), mkexpr(negMask
)) );
9977 /* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit
9978 value aa, computes, for each lane
9980 if aa < 0 then -aa else aa
9982 Note that the result is interpreted as unsigned, so that the
9983 absolute value of the most negative signed input can be
9986 static IRTemp
math_PABS_MMX ( IRTemp aa
, Int laneszB
)
9988 IRTemp res
= newTemp(Ity_I64
);
9989 IRTemp zero
= newTemp(Ity_I64
);
9990 IRTemp aaNeg
= newTemp(Ity_I64
);
9991 IRTemp negMask
= newTemp(Ity_I64
);
9992 IRTemp posMask
= newTemp(Ity_I64
);
9993 IROp opSub
= Iop_INVALID
;
9994 IROp opSarN
= Iop_INVALID
;
9997 case 1: opSub
= Iop_Sub8x8
; opSarN
= Iop_SarN8x8
; break;
9998 case 2: opSub
= Iop_Sub16x4
; opSarN
= Iop_SarN16x4
; break;
9999 case 4: opSub
= Iop_Sub32x2
; opSarN
= Iop_SarN32x2
; break;
10000 default: vassert(0);
10003 assign( negMask
, binop(opSarN
, mkexpr(aa
), mkU8(8*laneszB
-1)) );
10004 assign( posMask
, unop(Iop_Not64
, mkexpr(negMask
)) );
10005 assign( zero
, mkU64(0) );
10006 assign( aaNeg
, binop(opSub
, mkexpr(zero
), mkexpr(aa
)) );
10009 binop(Iop_And64
, mkexpr(aa
), mkexpr(posMask
)),
10010 binop(Iop_And64
, mkexpr(aaNeg
), mkexpr(negMask
)) ));
10014 /* XMM version of math_PABS_MMX. */
10015 static IRTemp
math_PABS_XMM ( IRTemp aa
, Int laneszB
)
10017 IRTemp res
= newTemp(Ity_V128
);
10018 IRTemp aaHi
= newTemp(Ity_I64
);
10019 IRTemp aaLo
= newTemp(Ity_I64
);
10020 assign(aaHi
, unop(Iop_V128HIto64
, mkexpr(aa
)));
10021 assign(aaLo
, unop(Iop_V128to64
, mkexpr(aa
)));
10022 assign(res
, binop(Iop_64HLtoV128
,
10023 mkexpr(math_PABS_MMX(aaHi
, laneszB
)),
10024 mkexpr(math_PABS_MMX(aaLo
, laneszB
))));
10028 /* Specialisations of math_PABS_XMM, since there's no easy way to do
10029 partial applications in C :-( */
10030 static IRTemp
math_PABS_XMM_pap4 ( IRTemp aa
) {
10031 return math_PABS_XMM(aa
, 4);
10034 static IRTemp
math_PABS_XMM_pap2 ( IRTemp aa
) {
10035 return math_PABS_XMM(aa
, 2);
10038 static IRTemp
math_PABS_XMM_pap1 ( IRTemp aa
) {
10039 return math_PABS_XMM(aa
, 1);
10042 /* YMM version of math_PABS_XMM. */
10043 static IRTemp
math_PABS_YMM ( IRTemp aa
, Int laneszB
)
10045 IRTemp res
= newTemp(Ity_V256
);
10046 IRTemp aaHi
= IRTemp_INVALID
;
10047 IRTemp aaLo
= IRTemp_INVALID
;
10048 breakupV256toV128s(aa
, &aaHi
, &aaLo
);
10049 assign(res
, binop(Iop_V128HLtoV256
,
10050 mkexpr(math_PABS_XMM(aaHi
, laneszB
)),
10051 mkexpr(math_PABS_XMM(aaLo
, laneszB
))));
10055 static IRTemp
math_PABS_YMM_pap4 ( IRTemp aa
) {
10056 return math_PABS_YMM(aa
, 4);
10059 static IRTemp
math_PABS_YMM_pap2 ( IRTemp aa
) {
10060 return math_PABS_YMM(aa
, 2);
10063 static IRTemp
math_PABS_YMM_pap1 ( IRTemp aa
) {
10064 return math_PABS_YMM(aa
, 1);
10067 static IRExpr
* dis_PALIGNR_XMM_helper ( IRTemp hi64
,
10068 IRTemp lo64
, Long byteShift
)
10070 vassert(byteShift
>= 1 && byteShift
<= 7);
10073 binop(Iop_Shl64
, mkexpr(hi64
), mkU8(8*(8-byteShift
))),
10074 binop(Iop_Shr64
, mkexpr(lo64
), mkU8(8*byteShift
))
10078 static IRTemp
math_PALIGNR_XMM ( IRTemp sV
, IRTemp dV
, UInt imm8
)
10080 IRTemp res
= newTemp(Ity_V128
);
10081 IRTemp sHi
= newTemp(Ity_I64
);
10082 IRTemp sLo
= newTemp(Ity_I64
);
10083 IRTemp dHi
= newTemp(Ity_I64
);
10084 IRTemp dLo
= newTemp(Ity_I64
);
10085 IRTemp rHi
= newTemp(Ity_I64
);
10086 IRTemp rLo
= newTemp(Ity_I64
);
10088 assign( dHi
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
10089 assign( dLo
, unop(Iop_V128to64
, mkexpr(dV
)) );
10090 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
10091 assign( sLo
, unop(Iop_V128to64
, mkexpr(sV
)) );
10094 assign( rHi
, mkexpr(sHi
) );
10095 assign( rLo
, mkexpr(sLo
) );
10097 else if (imm8
>= 1 && imm8
<= 7) {
10098 assign( rHi
, dis_PALIGNR_XMM_helper(dLo
, sHi
, imm8
) );
10099 assign( rLo
, dis_PALIGNR_XMM_helper(sHi
, sLo
, imm8
) );
10101 else if (imm8
== 8) {
10102 assign( rHi
, mkexpr(dLo
) );
10103 assign( rLo
, mkexpr(sHi
) );
10105 else if (imm8
>= 9 && imm8
<= 15) {
10106 assign( rHi
, dis_PALIGNR_XMM_helper(dHi
, dLo
, imm8
-8) );
10107 assign( rLo
, dis_PALIGNR_XMM_helper(dLo
, sHi
, imm8
-8) );
10109 else if (imm8
== 16) {
10110 assign( rHi
, mkexpr(dHi
) );
10111 assign( rLo
, mkexpr(dLo
) );
10113 else if (imm8
>= 17 && imm8
<= 23) {
10114 assign( rHi
, binop(Iop_Shr64
, mkexpr(dHi
), mkU8(8*(imm8
-16))) );
10115 assign( rLo
, dis_PALIGNR_XMM_helper(dHi
, dLo
, imm8
-16) );
10117 else if (imm8
== 24) {
10118 assign( rHi
, mkU64(0) );
10119 assign( rLo
, mkexpr(dHi
) );
10121 else if (imm8
>= 25 && imm8
<= 31) {
10122 assign( rHi
, mkU64(0) );
10123 assign( rLo
, binop(Iop_Shr64
, mkexpr(dHi
), mkU8(8*(imm8
-24))) );
10125 else if (imm8
>= 32 && imm8
<= 255) {
10126 assign( rHi
, mkU64(0) );
10127 assign( rLo
, mkU64(0) );
10132 assign( res
, binop(Iop_64HLtoV128
, mkexpr(rHi
), mkexpr(rLo
)));
10136 /* Generate a SIGSEGV followed by a restart of the current instruction
10137 if effective_addr is not 16-aligned. This is required behaviour
10138 for some SSE3 instructions and all 128-bit SSSE3 instructions.
10139 This assumes that guest_RIP_curr_instr is set correctly!
10140 On FreeBSD, this kind of error generates a SIGBUS. */
10142 void gen_SIGNAL_if_not_XX_aligned ( const VexAbiInfo
* vbi
,
10143 IRTemp effective_addr
, ULong mask
)
10148 binop(Iop_And64
,mkexpr(effective_addr
),mkU64(mask
)),
10150 vbi
->guest_amd64_sigbus_on_misalign
? Ijk_SigBUS
: Ijk_SigSEGV
,
10151 IRConst_U64(guest_RIP_curr_instr
),
10157 static void gen_SIGNAL_if_not_16_aligned ( const VexAbiInfo
* vbi
,
10158 IRTemp effective_addr
) {
10159 gen_SIGNAL_if_not_XX_aligned(vbi
, effective_addr
, 16-1);
10162 static void gen_SIGNAL_if_not_32_aligned ( const VexAbiInfo
* vbi
,
10163 IRTemp effective_addr
) {
10164 gen_SIGNAL_if_not_XX_aligned(vbi
, effective_addr
, 32-1);
10167 static void gen_SIGNAL_if_not_64_aligned ( const VexAbiInfo
* vbi
,
10168 IRTemp effective_addr
) {
10169 gen_SIGNAL_if_not_XX_aligned(vbi
, effective_addr
, 64-1);
10173 /* Helper for deciding whether a given insn (starting at the opcode
10174 byte) may validly be used with a LOCK prefix. The following insns
10175 may be used with LOCK when their destination operand is in memory.
10176 AFAICS this is exactly the same for both 32-bit and 64-bit mode.
10178 ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01
10179 OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09
10180 ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11
10181 SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19
10182 AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21
10183 SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29
10184 XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31
10194 BTC 0F BB, 0F BA /7
10195 BTR 0F B3, 0F BA /6
10196 BTS 0F AB, 0F BA /5
10198 CMPXCHG 0F B0, 0F B1
10203 ------------------------------
10205 80 /0 = addb $imm8, rm8
10206 81 /0 = addl $imm32, rm32 and addw $imm16, rm16
10207 82 /0 = addb $imm8, rm8
10208 83 /0 = addl $simm8, rm32 and addw $simm8, rm16
10211 01 = addl r32, rm32 and addw r16, rm16
10213 Same for ADD OR ADC SBB AND SUB XOR
10216 FF /1 = dec rm32 and dec rm16
10219 FF /0 = inc rm32 and inc rm16
10222 F7 /3 = neg rm32 and neg rm16
10225 F7 /2 = not rm32 and not rm16
10227 0F BB = btcw r16, rm16 and btcl r32, rm32
10228 OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32
10232 static Bool
can_be_used_with_LOCK_prefix ( const UChar
* opc
)
10235 case 0x00: case 0x01: case 0x08: case 0x09:
10236 case 0x10: case 0x11: case 0x18: case 0x19:
10237 case 0x20: case 0x21: case 0x28: case 0x29:
10238 case 0x30: case 0x31:
10239 if (!epartIsReg(opc
[1]))
10243 case 0x80: case 0x81: case 0x82: case 0x83:
10244 if (gregLO3ofRM(opc
[1]) >= 0 && gregLO3ofRM(opc
[1]) <= 6
10245 && !epartIsReg(opc
[1]))
10249 case 0xFE: case 0xFF:
10250 if (gregLO3ofRM(opc
[1]) >= 0 && gregLO3ofRM(opc
[1]) <= 1
10251 && !epartIsReg(opc
[1]))
10255 case 0xF6: case 0xF7:
10256 if (gregLO3ofRM(opc
[1]) >= 2 && gregLO3ofRM(opc
[1]) <= 3
10257 && !epartIsReg(opc
[1]))
10261 case 0x86: case 0x87:
10262 if (!epartIsReg(opc
[1]))
10268 case 0xBB: case 0xB3: case 0xAB:
10269 if (!epartIsReg(opc
[2]))
10273 if (gregLO3ofRM(opc
[2]) >= 5 && gregLO3ofRM(opc
[2]) <= 7
10274 && !epartIsReg(opc
[2]))
10277 case 0xB0: case 0xB1:
10278 if (!epartIsReg(opc
[2]))
10282 if (gregLO3ofRM(opc
[2]) == 1 && !epartIsReg(opc
[2]) )
10285 case 0xC0: case 0xC1:
10286 if (!epartIsReg(opc
[2]))
10291 } /* switch (opc[1]) */
10297 } /* switch (opc[0]) */
10303 /*------------------------------------------------------------*/
10305 /*--- Top-level SSE/SSE2: dis_ESC_0F__SSE2 ---*/
10307 /*------------------------------------------------------------*/
10309 static Long
dis_COMISD ( const VexAbiInfo
* vbi
, Prefix pfx
,
10310 Long delta
, Bool isAvx
, UChar opc
)
10312 vassert(opc
== 0x2F/*COMISD*/ || opc
== 0x2E/*UCOMISD*/);
10315 IRTemp argL
= newTemp(Ity_F64
);
10316 IRTemp argR
= newTemp(Ity_F64
);
10317 UChar modrm
= getUChar(delta
);
10318 IRTemp addr
= IRTemp_INVALID
;
10319 if (epartIsReg(modrm
)) {
10320 assign( argR
, getXMMRegLane64F( eregOfRexRM(pfx
,modrm
),
10321 0/*lowest lane*/ ) );
10323 DIP("%s%scomisd %s,%s\n", isAvx
? "v" : "",
10324 opc
==0x2E ? "u" : "",
10325 nameXMMReg(eregOfRexRM(pfx
,modrm
)),
10326 nameXMMReg(gregOfRexRM(pfx
,modrm
)) );
10328 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10329 assign( argR
, loadLE(Ity_F64
, mkexpr(addr
)) );
10331 DIP("%s%scomisd %s,%s\n", isAvx
? "v" : "",
10332 opc
==0x2E ? "u" : "",
10334 nameXMMReg(gregOfRexRM(pfx
,modrm
)) );
10336 assign( argL
, getXMMRegLane64F( gregOfRexRM(pfx
,modrm
),
10337 0/*lowest lane*/ ) );
10339 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
10340 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
10345 binop(Iop_CmpF64
, mkexpr(argL
), mkexpr(argR
)) ),
10348 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
10353 static Long
dis_COMISS ( const VexAbiInfo
* vbi
, Prefix pfx
,
10354 Long delta
, Bool isAvx
, UChar opc
)
10356 vassert(opc
== 0x2F/*COMISS*/ || opc
== 0x2E/*UCOMISS*/);
10359 IRTemp argL
= newTemp(Ity_F32
);
10360 IRTemp argR
= newTemp(Ity_F32
);
10361 UChar modrm
= getUChar(delta
);
10362 IRTemp addr
= IRTemp_INVALID
;
10363 if (epartIsReg(modrm
)) {
10364 assign( argR
, getXMMRegLane32F( eregOfRexRM(pfx
,modrm
),
10365 0/*lowest lane*/ ) );
10367 DIP("%s%scomiss %s,%s\n", isAvx
? "v" : "",
10368 opc
==0x2E ? "u" : "",
10369 nameXMMReg(eregOfRexRM(pfx
,modrm
)),
10370 nameXMMReg(gregOfRexRM(pfx
,modrm
)) );
10372 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10373 assign( argR
, loadLE(Ity_F32
, mkexpr(addr
)) );
10375 DIP("%s%scomiss %s,%s\n", isAvx
? "v" : "",
10376 opc
==0x2E ? "u" : "",
10378 nameXMMReg(gregOfRexRM(pfx
,modrm
)) );
10380 assign( argL
, getXMMRegLane32F( gregOfRexRM(pfx
,modrm
),
10381 0/*lowest lane*/ ) );
10383 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
10384 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
10390 unop(Iop_F32toF64
,mkexpr(argL
)),
10391 unop(Iop_F32toF64
,mkexpr(argR
)))),
10394 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
10399 static Long
dis_PSHUFD_32x4 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10400 Long delta
, Bool writesYmm
)
10405 IRTemp sV
= newTemp(Ity_V128
);
10406 UChar modrm
= getUChar(delta
);
10407 const HChar
* strV
= writesYmm
? "v" : "";
10408 IRTemp addr
= IRTemp_INVALID
;
10409 if (epartIsReg(modrm
)) {
10410 assign( sV
, getXMMReg(eregOfRexRM(pfx
,modrm
)) );
10411 order
= (Int
)getUChar(delta
+1);
10413 DIP("%spshufd $%d,%s,%s\n", strV
, order
,
10414 nameXMMReg(eregOfRexRM(pfx
,modrm
)),
10415 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
10417 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
,
10418 1/*byte after the amode*/ );
10419 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
10420 order
= (Int
)getUChar(delta
+alen
);
10422 DIP("%spshufd $%d,%s,%s\n", strV
, order
,
10424 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
10427 IRTemp s3
, s2
, s1
, s0
;
10428 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
10429 breakupV128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
10431 # define SEL(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
10432 IRTemp dV
= newTemp(Ity_V128
);
10434 mkV128from32s( SEL((order
>>6)&3), SEL((order
>>4)&3),
10435 SEL((order
>>2)&3), SEL((order
>>0)&3) )
10439 (writesYmm
? putYMMRegLoAndZU
: putXMMReg
)
10440 (gregOfRexRM(pfx
,modrm
), mkexpr(dV
));
10445 static Long
dis_PSHUFD_32x8 ( const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
)
10450 IRTemp sV
= newTemp(Ity_V256
);
10451 UChar modrm
= getUChar(delta
);
10452 IRTemp addr
= IRTemp_INVALID
;
10453 UInt rG
= gregOfRexRM(pfx
,modrm
);
10454 if (epartIsReg(modrm
)) {
10455 UInt rE
= eregOfRexRM(pfx
,modrm
);
10456 assign( sV
, getYMMReg(rE
) );
10457 order
= (Int
)getUChar(delta
+1);
10459 DIP("vpshufd $%d,%s,%s\n", order
, nameYMMReg(rE
), nameYMMReg(rG
));
10461 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
,
10462 1/*byte after the amode*/ );
10463 assign( sV
, loadLE(Ity_V256
, mkexpr(addr
)) );
10464 order
= (Int
)getUChar(delta
+alen
);
10466 DIP("vpshufd $%d,%s,%s\n", order
, dis_buf
, nameYMMReg(rG
));
10470 s
[7] = s
[6] = s
[5] = s
[4] = s
[3] = s
[2] = s
[1] = s
[0] = IRTemp_INVALID
;
10471 breakupV256to32s( sV
, &s
[7], &s
[6], &s
[5], &s
[4],
10472 &s
[3], &s
[2], &s
[1], &s
[0] );
10474 putYMMReg( rG
, mkV256from32s( s
[4 + ((order
>>6)&3)],
10475 s
[4 + ((order
>>4)&3)],
10476 s
[4 + ((order
>>2)&3)],
10477 s
[4 + ((order
>>0)&3)],
10478 s
[0 + ((order
>>6)&3)],
10479 s
[0 + ((order
>>4)&3)],
10480 s
[0 + ((order
>>2)&3)],
10481 s
[0 + ((order
>>0)&3)] ) );
10486 static IRTemp
math_PSRLDQ ( IRTemp sV
, Int imm
)
10488 IRTemp dV
= newTemp(Ity_V128
);
10489 IRTemp hi64
= newTemp(Ity_I64
);
10490 IRTemp lo64
= newTemp(Ity_I64
);
10491 IRTemp hi64r
= newTemp(Ity_I64
);
10492 IRTemp lo64r
= newTemp(Ity_I64
);
10494 vassert(imm
>= 0 && imm
<= 255);
10496 assign(dV
, mkV128(0x0000));
10500 assign( hi64
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
10501 assign( lo64
, unop(Iop_V128to64
, mkexpr(sV
)) );
10504 assign( lo64r
, mkexpr(lo64
) );
10505 assign( hi64r
, mkexpr(hi64
) );
10509 assign( hi64r
, mkU64(0) );
10510 assign( lo64r
, mkexpr(hi64
) );
10514 assign( hi64r
, mkU64(0) );
10515 assign( lo64r
, binop( Iop_Shr64
, mkexpr(hi64
), mkU8( 8*(imm
-8) ) ));
10517 assign( hi64r
, binop( Iop_Shr64
, mkexpr(hi64
), mkU8(8 * imm
) ));
10520 binop(Iop_Shr64
, mkexpr(lo64
),
10522 binop(Iop_Shl64
, mkexpr(hi64
),
10523 mkU8(8 * (8 - imm
)) )
10528 assign( dV
, binop(Iop_64HLtoV128
, mkexpr(hi64r
), mkexpr(lo64r
)) );
10533 static IRTemp
math_PSLLDQ ( IRTemp sV
, Int imm
)
10535 IRTemp dV
= newTemp(Ity_V128
);
10536 IRTemp hi64
= newTemp(Ity_I64
);
10537 IRTemp lo64
= newTemp(Ity_I64
);
10538 IRTemp hi64r
= newTemp(Ity_I64
);
10539 IRTemp lo64r
= newTemp(Ity_I64
);
10541 vassert(imm
>= 0 && imm
<= 255);
10543 assign(dV
, mkV128(0x0000));
10547 assign( hi64
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
10548 assign( lo64
, unop(Iop_V128to64
, mkexpr(sV
)) );
10551 assign( lo64r
, mkexpr(lo64
) );
10552 assign( hi64r
, mkexpr(hi64
) );
10556 assign( lo64r
, mkU64(0) );
10557 assign( hi64r
, mkexpr(lo64
) );
10561 assign( lo64r
, mkU64(0) );
10562 assign( hi64r
, binop( Iop_Shl64
, mkexpr(lo64
), mkU8( 8*(imm
-8) ) ));
10564 assign( lo64r
, binop( Iop_Shl64
, mkexpr(lo64
), mkU8(8 * imm
) ));
10567 binop(Iop_Shl64
, mkexpr(hi64
),
10569 binop(Iop_Shr64
, mkexpr(lo64
),
10570 mkU8(8 * (8 - imm
)) )
10575 assign( dV
, binop(Iop_64HLtoV128
, mkexpr(hi64r
), mkexpr(lo64r
)) );
10580 static Long
dis_CVTxSD2SI ( const VexAbiInfo
* vbi
, Prefix pfx
,
10581 Long delta
, Bool isAvx
, UChar opc
, Int sz
)
10583 vassert(opc
== 0x2D/*CVTSD2SI*/ || opc
== 0x2C/*CVTTSD2SI*/);
10586 UChar modrm
= getUChar(delta
);
10587 IRTemp addr
= IRTemp_INVALID
;
10588 IRTemp rmode
= newTemp(Ity_I32
);
10589 IRTemp f64lo
= newTemp(Ity_F64
);
10590 Bool r2zero
= toBool(opc
== 0x2C);
10592 if (epartIsReg(modrm
)) {
10594 assign(f64lo
, getXMMRegLane64F(eregOfRexRM(pfx
,modrm
), 0));
10595 DIP("%scvt%ssd2si %s,%s\n", isAvx
? "v" : "", r2zero
? "t" : "",
10596 nameXMMReg(eregOfRexRM(pfx
,modrm
)),
10597 nameIReg(sz
, gregOfRexRM(pfx
,modrm
),
10600 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10601 assign(f64lo
, loadLE(Ity_F64
, mkexpr(addr
)));
10603 DIP("%scvt%ssd2si %s,%s\n", isAvx
? "v" : "", r2zero
? "t" : "",
10605 nameIReg(sz
, gregOfRexRM(pfx
,modrm
),
10610 assign( rmode
, mkU32((UInt
)Irrm_ZERO
) );
10612 assign( rmode
, get_sse_roundingmode() );
10616 putIReg32( gregOfRexRM(pfx
,modrm
),
10617 binop( Iop_F64toI32S
, mkexpr(rmode
), mkexpr(f64lo
)) );
10620 putIReg64( gregOfRexRM(pfx
,modrm
),
10621 binop( Iop_F64toI64S
, mkexpr(rmode
), mkexpr(f64lo
)) );
10628 static Long
dis_CVTxSS2SI ( const VexAbiInfo
* vbi
, Prefix pfx
,
10629 Long delta
, Bool isAvx
, UChar opc
, Int sz
)
10631 vassert(opc
== 0x2D/*CVTSS2SI*/ || opc
== 0x2C/*CVTTSS2SI*/);
10634 UChar modrm
= getUChar(delta
);
10635 IRTemp addr
= IRTemp_INVALID
;
10636 IRTemp rmode
= newTemp(Ity_I32
);
10637 IRTemp f32lo
= newTemp(Ity_F32
);
10638 Bool r2zero
= toBool(opc
== 0x2C);
10640 if (epartIsReg(modrm
)) {
10642 assign(f32lo
, getXMMRegLane32F(eregOfRexRM(pfx
,modrm
), 0));
10643 DIP("%scvt%sss2si %s,%s\n", isAvx
? "v" : "", r2zero
? "t" : "",
10644 nameXMMReg(eregOfRexRM(pfx
,modrm
)),
10645 nameIReg(sz
, gregOfRexRM(pfx
,modrm
),
10648 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10649 assign(f32lo
, loadLE(Ity_F32
, mkexpr(addr
)));
10651 DIP("%scvt%sss2si %s,%s\n", isAvx
? "v" : "", r2zero
? "t" : "",
10653 nameIReg(sz
, gregOfRexRM(pfx
,modrm
),
10658 assign( rmode
, mkU32((UInt
)Irrm_ZERO
) );
10660 assign( rmode
, get_sse_roundingmode() );
10664 putIReg32( gregOfRexRM(pfx
,modrm
),
10665 binop( Iop_F64toI32S
,
10667 unop(Iop_F32toF64
, mkexpr(f32lo
))) );
10670 putIReg64( gregOfRexRM(pfx
,modrm
),
10671 binop( Iop_F64toI64S
,
10673 unop(Iop_F32toF64
, mkexpr(f32lo
))) );
10680 static Long
dis_CVTPS2PD_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10681 Long delta
, Bool isAvx
)
10683 IRTemp addr
= IRTemp_INVALID
;
10686 IRTemp f32lo
= newTemp(Ity_F32
);
10687 IRTemp f32hi
= newTemp(Ity_F32
);
10688 UChar modrm
= getUChar(delta
);
10689 UInt rG
= gregOfRexRM(pfx
,modrm
);
10690 if (epartIsReg(modrm
)) {
10691 UInt rE
= eregOfRexRM(pfx
,modrm
);
10692 assign( f32lo
, getXMMRegLane32F(rE
, 0) );
10693 assign( f32hi
, getXMMRegLane32F(rE
, 1) );
10695 DIP("%scvtps2pd %s,%s\n",
10696 isAvx
? "v" : "", nameXMMReg(rE
), nameXMMReg(rG
));
10698 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10699 assign( f32lo
, loadLE(Ity_F32
, mkexpr(addr
)) );
10700 assign( f32hi
, loadLE(Ity_F32
,
10701 binop(Iop_Add64
,mkexpr(addr
),mkU64(4))) );
10703 DIP("%scvtps2pd %s,%s\n",
10704 isAvx
? "v" : "", dis_buf
, nameXMMReg(rG
));
10707 putXMMRegLane64F( rG
, 1, unop(Iop_F32toF64
, mkexpr(f32hi
)) );
10708 putXMMRegLane64F( rG
, 0, unop(Iop_F32toF64
, mkexpr(f32lo
)) );
10710 putYMMRegLane128( rG
, 1, mkV128(0));
10715 static Long
dis_CVTPS2PD_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10718 IRTemp addr
= IRTemp_INVALID
;
10721 IRTemp f32_0
= newTemp(Ity_F32
);
10722 IRTemp f32_1
= newTemp(Ity_F32
);
10723 IRTemp f32_2
= newTemp(Ity_F32
);
10724 IRTemp f32_3
= newTemp(Ity_F32
);
10725 UChar modrm
= getUChar(delta
);
10726 UInt rG
= gregOfRexRM(pfx
,modrm
);
10727 if (epartIsReg(modrm
)) {
10728 UInt rE
= eregOfRexRM(pfx
,modrm
);
10729 assign( f32_0
, getXMMRegLane32F(rE
, 0) );
10730 assign( f32_1
, getXMMRegLane32F(rE
, 1) );
10731 assign( f32_2
, getXMMRegLane32F(rE
, 2) );
10732 assign( f32_3
, getXMMRegLane32F(rE
, 3) );
10734 DIP("vcvtps2pd %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
));
10736 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10737 assign( f32_0
, loadLE(Ity_F32
, mkexpr(addr
)) );
10738 assign( f32_1
, loadLE(Ity_F32
,
10739 binop(Iop_Add64
,mkexpr(addr
),mkU64(4))) );
10740 assign( f32_2
, loadLE(Ity_F32
,
10741 binop(Iop_Add64
,mkexpr(addr
),mkU64(8))) );
10742 assign( f32_3
, loadLE(Ity_F32
,
10743 binop(Iop_Add64
,mkexpr(addr
),mkU64(12))) );
10745 DIP("vcvtps2pd %s,%s\n", dis_buf
, nameYMMReg(rG
));
10748 putYMMRegLane64F( rG
, 3, unop(Iop_F32toF64
, mkexpr(f32_3
)) );
10749 putYMMRegLane64F( rG
, 2, unop(Iop_F32toF64
, mkexpr(f32_2
)) );
10750 putYMMRegLane64F( rG
, 1, unop(Iop_F32toF64
, mkexpr(f32_1
)) );
10751 putYMMRegLane64F( rG
, 0, unop(Iop_F32toF64
, mkexpr(f32_0
)) );
10756 static Long
dis_CVTPD2PS_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10757 Long delta
, Bool isAvx
)
10759 IRTemp addr
= IRTemp_INVALID
;
10762 UChar modrm
= getUChar(delta
);
10763 UInt rG
= gregOfRexRM(pfx
,modrm
);
10764 IRTemp argV
= newTemp(Ity_V128
);
10765 IRTemp rmode
= newTemp(Ity_I32
);
10766 if (epartIsReg(modrm
)) {
10767 UInt rE
= eregOfRexRM(pfx
,modrm
);
10768 assign( argV
, getXMMReg(rE
) );
10770 DIP("%scvtpd2ps %s,%s\n", isAvx
? "v" : "",
10771 nameXMMReg(rE
), nameXMMReg(rG
));
10773 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10774 assign( argV
, loadLE(Ity_V128
, mkexpr(addr
)) );
10776 DIP("%scvtpd2ps %s,%s\n", isAvx
? "v" : "",
10777 dis_buf
, nameXMMReg(rG
) );
10780 assign( rmode
, get_sse_roundingmode() );
10781 IRTemp t0
= newTemp(Ity_F64
);
10782 IRTemp t1
= newTemp(Ity_F64
);
10783 assign( t0
, unop(Iop_ReinterpI64asF64
,
10784 unop(Iop_V128to64
, mkexpr(argV
))) );
10785 assign( t1
, unop(Iop_ReinterpI64asF64
,
10786 unop(Iop_V128HIto64
, mkexpr(argV
))) );
10788 # define CVT(_t) binop( Iop_F64toF32, mkexpr(rmode), mkexpr(_t) )
10789 putXMMRegLane32( rG
, 3, mkU32(0) );
10790 putXMMRegLane32( rG
, 2, mkU32(0) );
10791 putXMMRegLane32F( rG
, 1, CVT(t1
) );
10792 putXMMRegLane32F( rG
, 0, CVT(t0
) );
10795 putYMMRegLane128( rG
, 1, mkV128(0) );
10801 static Long
dis_CVTxPS2DQ_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10802 Long delta
, Bool isAvx
, Bool r2zero
)
10804 IRTemp addr
= IRTemp_INVALID
;
10807 UChar modrm
= getUChar(delta
);
10808 IRTemp argV
= newTemp(Ity_V128
);
10809 IRTemp rmode
= newTemp(Ity_I32
);
10810 UInt rG
= gregOfRexRM(pfx
,modrm
);
10812 if (epartIsReg(modrm
)) {
10813 UInt rE
= eregOfRexRM(pfx
,modrm
);
10814 assign( argV
, getXMMReg(rE
) );
10816 DIP("%scvt%sps2dq %s,%s\n",
10817 isAvx
? "v" : "", r2zero
? "t" : "", nameXMMReg(rE
), nameXMMReg(rG
));
10819 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10820 assign( argV
, loadLE(Ity_V128
, mkexpr(addr
)) );
10822 DIP("%scvt%sps2dq %s,%s\n",
10823 isAvx
? "v" : "", r2zero
? "t" : "", dis_buf
, nameXMMReg(rG
) );
10826 assign( rmode
, r2zero
? mkU32((UInt
)Irrm_ZERO
)
10827 : get_sse_roundingmode() );
10828 putXMMReg( rG
, binop(Iop_F32toI32Sx4
, mkexpr(rmode
), mkexpr(argV
)) );
10830 putYMMRegLane128( rG
, 1, mkV128(0) );
10836 static Long
dis_CVTxPS2DQ_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10837 Long delta
, Bool r2zero
)
10839 IRTemp addr
= IRTemp_INVALID
;
10842 UChar modrm
= getUChar(delta
);
10843 IRTemp argV
= newTemp(Ity_V256
);
10844 IRTemp rmode
= newTemp(Ity_I32
);
10845 UInt rG
= gregOfRexRM(pfx
,modrm
);
10847 if (epartIsReg(modrm
)) {
10848 UInt rE
= eregOfRexRM(pfx
,modrm
);
10849 assign( argV
, getYMMReg(rE
) );
10851 DIP("vcvt%sps2dq %s,%s\n",
10852 r2zero
? "t" : "", nameYMMReg(rE
), nameYMMReg(rG
));
10854 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10855 assign( argV
, loadLE(Ity_V256
, mkexpr(addr
)) );
10857 DIP("vcvt%sps2dq %s,%s\n",
10858 r2zero
? "t" : "", dis_buf
, nameYMMReg(rG
) );
10861 assign( rmode
, r2zero
? mkU32((UInt
)Irrm_ZERO
)
10862 : get_sse_roundingmode() );
10863 putYMMReg( rG
, binop(Iop_F32toI32Sx8
, mkexpr(rmode
), mkexpr(argV
)) );
10868 static Long
dis_CVTxPD2DQ_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10869 Long delta
, Bool isAvx
, Bool r2zero
)
10871 IRTemp addr
= IRTemp_INVALID
;
10874 UChar modrm
= getUChar(delta
);
10875 IRTemp argV
= newTemp(Ity_V128
);
10876 IRTemp rmode
= newTemp(Ity_I32
);
10877 UInt rG
= gregOfRexRM(pfx
,modrm
);
10880 if (epartIsReg(modrm
)) {
10881 UInt rE
= eregOfRexRM(pfx
,modrm
);
10882 assign( argV
, getXMMReg(rE
) );
10884 DIP("%scvt%spd2dq %s,%s\n",
10885 isAvx
? "v" : "", r2zero
? "t" : "", nameXMMReg(rE
), nameXMMReg(rG
));
10887 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10888 assign( argV
, loadLE(Ity_V128
, mkexpr(addr
)) );
10890 DIP("%scvt%spd2dqx %s,%s\n",
10891 isAvx
? "v" : "", r2zero
? "t" : "", dis_buf
, nameXMMReg(rG
) );
10895 assign(rmode
, mkU32((UInt
)Irrm_ZERO
) );
10897 assign( rmode
, get_sse_roundingmode() );
10900 t0
= newTemp(Ity_F64
);
10901 t1
= newTemp(Ity_F64
);
10902 assign( t0
, unop(Iop_ReinterpI64asF64
,
10903 unop(Iop_V128to64
, mkexpr(argV
))) );
10904 assign( t1
, unop(Iop_ReinterpI64asF64
,
10905 unop(Iop_V128HIto64
, mkexpr(argV
))) );
10907 # define CVT(_t) binop( Iop_F64toI32S, \
10911 putXMMRegLane32( rG
, 3, mkU32(0) );
10912 putXMMRegLane32( rG
, 2, mkU32(0) );
10913 putXMMRegLane32( rG
, 1, CVT(t1
) );
10914 putXMMRegLane32( rG
, 0, CVT(t0
) );
10917 putYMMRegLane128( rG
, 1, mkV128(0) );
10923 static Long
dis_CVTxPD2DQ_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10924 Long delta
, Bool r2zero
)
10926 IRTemp addr
= IRTemp_INVALID
;
10929 UChar modrm
= getUChar(delta
);
10930 IRTemp argV
= newTemp(Ity_V256
);
10931 IRTemp rmode
= newTemp(Ity_I32
);
10932 UInt rG
= gregOfRexRM(pfx
,modrm
);
10933 IRTemp t0
, t1
, t2
, t3
;
10935 if (epartIsReg(modrm
)) {
10936 UInt rE
= eregOfRexRM(pfx
,modrm
);
10937 assign( argV
, getYMMReg(rE
) );
10939 DIP("vcvt%spd2dq %s,%s\n",
10940 r2zero
? "t" : "", nameYMMReg(rE
), nameXMMReg(rG
));
10942 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10943 assign( argV
, loadLE(Ity_V256
, mkexpr(addr
)) );
10945 DIP("vcvt%spd2dqy %s,%s\n",
10946 r2zero
? "t" : "", dis_buf
, nameXMMReg(rG
) );
10950 assign(rmode
, mkU32((UInt
)Irrm_ZERO
) );
10952 assign( rmode
, get_sse_roundingmode() );
10955 t0
= IRTemp_INVALID
;
10956 t1
= IRTemp_INVALID
;
10957 t2
= IRTemp_INVALID
;
10958 t3
= IRTemp_INVALID
;
10959 breakupV256to64s( argV
, &t3
, &t2
, &t1
, &t0
);
10961 # define CVT(_t) binop( Iop_F64toI32S, \
10963 unop( Iop_ReinterpI64asF64, \
10966 putXMMRegLane32( rG
, 3, CVT(t3
) );
10967 putXMMRegLane32( rG
, 2, CVT(t2
) );
10968 putXMMRegLane32( rG
, 1, CVT(t1
) );
10969 putXMMRegLane32( rG
, 0, CVT(t0
) );
10971 putYMMRegLane128( rG
, 1, mkV128(0) );
10977 static Long
dis_CVTDQ2PS_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
10978 Long delta
, Bool isAvx
)
10980 IRTemp addr
= IRTemp_INVALID
;
10983 UChar modrm
= getUChar(delta
);
10984 IRTemp argV
= newTemp(Ity_V128
);
10985 IRTemp rmode
= newTemp(Ity_I32
);
10986 UInt rG
= gregOfRexRM(pfx
,modrm
);
10988 if (epartIsReg(modrm
)) {
10989 UInt rE
= eregOfRexRM(pfx
,modrm
);
10990 assign( argV
, getXMMReg(rE
) );
10992 DIP("%scvtdq2ps %s,%s\n",
10993 isAvx
? "v" : "", nameXMMReg(rE
), nameXMMReg(rG
));
10995 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
10996 assign( argV
, loadLE(Ity_V128
, mkexpr(addr
)) );
10998 DIP("%scvtdq2ps %s,%s\n",
10999 isAvx
? "v" : "", dis_buf
, nameXMMReg(rG
) );
11002 assign( rmode
, get_sse_roundingmode() );
11003 putXMMReg(rG
, binop(Iop_I32StoF32x4
, mkexpr(rmode
), mkexpr(argV
)));
11006 putYMMRegLane128( rG
, 1, mkV128(0) );
11011 static Long
dis_CVTDQ2PS_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
11014 IRTemp addr
= IRTemp_INVALID
;
11017 UChar modrm
= getUChar(delta
);
11018 IRTemp argV
= newTemp(Ity_V256
);
11019 IRTemp rmode
= newTemp(Ity_I32
);
11020 UInt rG
= gregOfRexRM(pfx
,modrm
);
11022 if (epartIsReg(modrm
)) {
11023 UInt rE
= eregOfRexRM(pfx
,modrm
);
11024 assign( argV
, getYMMReg(rE
) );
11026 DIP("vcvtdq2ps %s,%s\n", nameYMMReg(rE
), nameYMMReg(rG
));
11028 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
11029 assign( argV
, loadLE(Ity_V256
, mkexpr(addr
)) );
11031 DIP("vcvtdq2ps %s,%s\n", dis_buf
, nameYMMReg(rG
) );
11034 assign( rmode
, get_sse_roundingmode() );
11035 putYMMReg(rG
, binop(Iop_I32StoF32x8
, mkexpr(rmode
), mkexpr(argV
)));
11041 static Long
dis_PMOVMSKB_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
11042 Long delta
, Bool isAvx
)
11044 UChar modrm
= getUChar(delta
);
11045 vassert(epartIsReg(modrm
)); /* ensured by caller */
11046 UInt rE
= eregOfRexRM(pfx
,modrm
);
11047 UInt rG
= gregOfRexRM(pfx
,modrm
);
11048 IRTemp t0
= newTemp(Ity_V128
);
11049 IRTemp t1
= newTemp(Ity_I32
);
11050 assign(t0
, getXMMReg(rE
));
11051 assign(t1
, unop(Iop_16Uto32
, unop(Iop_GetMSBs8x16
, mkexpr(t0
))));
11052 putIReg32(rG
, mkexpr(t1
));
11053 DIP("%spmovmskb %s,%s\n", isAvx
? "v" : "", nameXMMReg(rE
),
11060 static Long
dis_PMOVMSKB_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
11063 UChar modrm
= getUChar(delta
);
11064 vassert(epartIsReg(modrm
)); /* ensured by caller */
11065 UInt rE
= eregOfRexRM(pfx
,modrm
);
11066 UInt rG
= gregOfRexRM(pfx
,modrm
);
11067 IRTemp t0
= newTemp(Ity_V128
);
11068 IRTemp t1
= newTemp(Ity_V128
);
11069 IRTemp t2
= newTemp(Ity_I16
);
11070 IRTemp t3
= newTemp(Ity_I16
);
11071 assign(t0
, getYMMRegLane128(rE
, 0));
11072 assign(t1
, getYMMRegLane128(rE
, 1));
11073 assign(t2
, unop(Iop_GetMSBs8x16
, mkexpr(t0
)));
11074 assign(t3
, unop(Iop_GetMSBs8x16
, mkexpr(t1
)));
11075 putIReg32(rG
, binop(Iop_16HLto32
, mkexpr(t3
), mkexpr(t2
)));
11076 DIP("vpmovmskb %s,%s\n", nameYMMReg(rE
), nameIReg32(rG
));
11082 /* FIXME: why not just use InterleaveLO / InterleaveHI? I think the
11083 relevant ops are "xIsH ? InterleaveHI32x4 : InterleaveLO32x4". */
11084 /* Does the maths for 128 bit versions of UNPCKLPS and UNPCKHPS */
11085 static IRTemp
math_UNPCKxPS_128 ( IRTemp sV
, IRTemp dV
, Bool xIsH
)
11087 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
11088 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
11089 breakupV128to32s( dV
, &d3
, &d2
, &d1
, &d0
);
11090 breakupV128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
11091 IRTemp res
= newTemp(Ity_V128
);
11092 assign(res
, xIsH
? mkV128from32s( s3
, d3
, s2
, d2
)
11093 : mkV128from32s( s1
, d1
, s0
, d0
));
11098 /* FIXME: why not just use InterleaveLO / InterleaveHI ?? */
11099 /* Does the maths for 128 bit versions of UNPCKLPD and UNPCKHPD */
11100 static IRTemp
math_UNPCKxPD_128 ( IRTemp sV
, IRTemp dV
, Bool xIsH
)
11102 IRTemp s1
= newTemp(Ity_I64
);
11103 IRTemp s0
= newTemp(Ity_I64
);
11104 IRTemp d1
= newTemp(Ity_I64
);
11105 IRTemp d0
= newTemp(Ity_I64
);
11106 assign( d1
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
11107 assign( d0
, unop(Iop_V128to64
, mkexpr(dV
)) );
11108 assign( s1
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
11109 assign( s0
, unop(Iop_V128to64
, mkexpr(sV
)) );
11110 IRTemp res
= newTemp(Ity_V128
);
11111 assign(res
, xIsH
? binop(Iop_64HLtoV128
, mkexpr(s1
), mkexpr(d1
))
11112 : binop(Iop_64HLtoV128
, mkexpr(s0
), mkexpr(d0
)));
11117 /* Does the maths for 256 bit versions of UNPCKLPD and UNPCKHPD.
11118 Doesn't seem like this fits in either of the Iop_Interleave{LO,HI}
11119 or the Iop_Cat{Odd,Even}Lanes idioms, hence just do it the stupid
11121 static IRTemp
math_UNPCKxPD_256 ( IRTemp sV
, IRTemp dV
, Bool xIsH
)
11123 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
11124 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
11125 breakupV256to64s( dV
, &d3
, &d2
, &d1
, &d0
);
11126 breakupV256to64s( sV
, &s3
, &s2
, &s1
, &s0
);
11127 IRTemp res
= newTemp(Ity_V256
);
11129 ? IRExpr_Qop(Iop_64x4toV256
, mkexpr(s3
), mkexpr(d3
),
11130 mkexpr(s1
), mkexpr(d1
))
11131 : IRExpr_Qop(Iop_64x4toV256
, mkexpr(s2
), mkexpr(d2
),
11132 mkexpr(s0
), mkexpr(d0
)));
11137 /* FIXME: this is really bad. Surely can do something better here?
11138 One observation is that the steering in the upper and lower 128 bit
11139 halves is the same as with math_UNPCKxPS_128, so we simply split
11140 into two halves, and use that. Consequently any improvement in
11141 math_UNPCKxPS_128 (probably, to use interleave-style primops)
11142 benefits this too. */
11143 static IRTemp
math_UNPCKxPS_256 ( IRTemp sV
, IRTemp dV
, Bool xIsH
)
11145 IRTemp sVhi
= IRTemp_INVALID
, sVlo
= IRTemp_INVALID
;
11146 IRTemp dVhi
= IRTemp_INVALID
, dVlo
= IRTemp_INVALID
;
11147 breakupV256toV128s( sV
, &sVhi
, &sVlo
);
11148 breakupV256toV128s( dV
, &dVhi
, &dVlo
);
11149 IRTemp rVhi
= math_UNPCKxPS_128(sVhi
, dVhi
, xIsH
);
11150 IRTemp rVlo
= math_UNPCKxPS_128(sVlo
, dVlo
, xIsH
);
11151 IRTemp rV
= newTemp(Ity_V256
);
11152 assign(rV
, binop(Iop_V128HLtoV256
, mkexpr(rVhi
), mkexpr(rVlo
)));
11157 static IRTemp
math_SHUFPS_128 ( IRTemp sV
, IRTemp dV
, UInt imm8
)
11159 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
11160 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
11161 vassert(imm8
< 256);
11163 breakupV128to32s( dV
, &d3
, &d2
, &d1
, &d0
);
11164 breakupV128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
11166 # define SELD(n) ((n)==0 ? d0 : ((n)==1 ? d1 : ((n)==2 ? d2 : d3)))
11167 # define SELS(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11168 IRTemp res
= newTemp(Ity_V128
);
11170 mkV128from32s( SELS((imm8
>>6)&3), SELS((imm8
>>4)&3),
11171 SELD((imm8
>>2)&3), SELD((imm8
>>0)&3) ) );
11178 /* 256-bit SHUFPS appears to steer each of the 128-bit halves
11179 identically. Hence do the clueless thing and use math_SHUFPS_128
11181 static IRTemp
math_SHUFPS_256 ( IRTemp sV
, IRTemp dV
, UInt imm8
)
11183 IRTemp sVhi
= IRTemp_INVALID
, sVlo
= IRTemp_INVALID
;
11184 IRTemp dVhi
= IRTemp_INVALID
, dVlo
= IRTemp_INVALID
;
11185 breakupV256toV128s( sV
, &sVhi
, &sVlo
);
11186 breakupV256toV128s( dV
, &dVhi
, &dVlo
);
11187 IRTemp rVhi
= math_SHUFPS_128(sVhi
, dVhi
, imm8
);
11188 IRTemp rVlo
= math_SHUFPS_128(sVlo
, dVlo
, imm8
);
11189 IRTemp rV
= newTemp(Ity_V256
);
11190 assign(rV
, binop(Iop_V128HLtoV256
, mkexpr(rVhi
), mkexpr(rVlo
)));
11195 static IRTemp
math_SHUFPD_128 ( IRTemp sV
, IRTemp dV
, UInt imm8
)
11197 IRTemp s1
= newTemp(Ity_I64
);
11198 IRTemp s0
= newTemp(Ity_I64
);
11199 IRTemp d1
= newTemp(Ity_I64
);
11200 IRTemp d0
= newTemp(Ity_I64
);
11202 assign( d1
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
11203 assign( d0
, unop(Iop_V128to64
, mkexpr(dV
)) );
11204 assign( s1
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
11205 assign( s0
, unop(Iop_V128to64
, mkexpr(sV
)) );
11207 # define SELD(n) mkexpr((n)==0 ? d0 : d1)
11208 # define SELS(n) mkexpr((n)==0 ? s0 : s1)
11210 IRTemp res
= newTemp(Ity_V128
);
11211 assign(res
, binop( Iop_64HLtoV128
,
11212 SELS((imm8
>>1)&1), SELD((imm8
>>0)&1) ) );
11220 static IRTemp
math_SHUFPD_256 ( IRTemp sV
, IRTemp dV
, UInt imm8
)
11222 IRTemp sVhi
= IRTemp_INVALID
, sVlo
= IRTemp_INVALID
;
11223 IRTemp dVhi
= IRTemp_INVALID
, dVlo
= IRTemp_INVALID
;
11224 breakupV256toV128s( sV
, &sVhi
, &sVlo
);
11225 breakupV256toV128s( dV
, &dVhi
, &dVlo
);
11226 IRTemp rVhi
= math_SHUFPD_128(sVhi
, dVhi
, (imm8
>> 2) & 3);
11227 IRTemp rVlo
= math_SHUFPD_128(sVlo
, dVlo
, imm8
& 3);
11228 IRTemp rV
= newTemp(Ity_V256
);
11229 assign(rV
, binop(Iop_V128HLtoV256
, mkexpr(rVhi
), mkexpr(rVlo
)));
11234 static IRTemp
math_BLENDPD_128 ( IRTemp sV
, IRTemp dV
, UInt imm8
)
11236 UShort imm8_mask_16
;
11237 IRTemp imm8_mask
= newTemp(Ity_V128
);
11239 switch( imm8
& 3 ) {
11240 case 0: imm8_mask_16
= 0x0000; break;
11241 case 1: imm8_mask_16
= 0x00FF; break;
11242 case 2: imm8_mask_16
= 0xFF00; break;
11243 case 3: imm8_mask_16
= 0xFFFF; break;
11244 default: vassert(0); break;
11246 assign( imm8_mask
, mkV128( imm8_mask_16
) );
11248 IRTemp res
= newTemp(Ity_V128
);
11249 assign ( res
, binop( Iop_OrV128
,
11250 binop( Iop_AndV128
, mkexpr(sV
),
11251 mkexpr(imm8_mask
) ),
11252 binop( Iop_AndV128
, mkexpr(dV
),
11253 unop( Iop_NotV128
, mkexpr(imm8_mask
) ) ) ) );
11258 static IRTemp
math_BLENDPD_256 ( IRTemp sV
, IRTemp dV
, UInt imm8
)
11260 IRTemp sVhi
= IRTemp_INVALID
, sVlo
= IRTemp_INVALID
;
11261 IRTemp dVhi
= IRTemp_INVALID
, dVlo
= IRTemp_INVALID
;
11262 breakupV256toV128s( sV
, &sVhi
, &sVlo
);
11263 breakupV256toV128s( dV
, &dVhi
, &dVlo
);
11264 IRTemp rVhi
= math_BLENDPD_128(sVhi
, dVhi
, (imm8
>> 2) & 3);
11265 IRTemp rVlo
= math_BLENDPD_128(sVlo
, dVlo
, imm8
& 3);
11266 IRTemp rV
= newTemp(Ity_V256
);
11267 assign(rV
, binop(Iop_V128HLtoV256
, mkexpr(rVhi
), mkexpr(rVlo
)));
11272 static IRTemp
math_BLENDPS_128 ( IRTemp sV
, IRTemp dV
, UInt imm8
)
11274 UShort imm8_perms
[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00,
11275 0x0F0F, 0x0FF0, 0x0FFF, 0xF000, 0xF00F,
11276 0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0,
11278 IRTemp imm8_mask
= newTemp(Ity_V128
);
11279 assign( imm8_mask
, mkV128( imm8_perms
[ (imm8
& 15) ] ) );
11281 IRTemp res
= newTemp(Ity_V128
);
11282 assign ( res
, binop( Iop_OrV128
,
11283 binop( Iop_AndV128
, mkexpr(sV
),
11284 mkexpr(imm8_mask
) ),
11285 binop( Iop_AndV128
, mkexpr(dV
),
11286 unop( Iop_NotV128
, mkexpr(imm8_mask
) ) ) ) );
11291 static IRTemp
math_BLENDPS_256 ( IRTemp sV
, IRTemp dV
, UInt imm8
)
11293 IRTemp sVhi
= IRTemp_INVALID
, sVlo
= IRTemp_INVALID
;
11294 IRTemp dVhi
= IRTemp_INVALID
, dVlo
= IRTemp_INVALID
;
11295 breakupV256toV128s( sV
, &sVhi
, &sVlo
);
11296 breakupV256toV128s( dV
, &dVhi
, &dVlo
);
11297 IRTemp rVhi
= math_BLENDPS_128(sVhi
, dVhi
, (imm8
>> 4) & 15);
11298 IRTemp rVlo
= math_BLENDPS_128(sVlo
, dVlo
, imm8
& 15);
11299 IRTemp rV
= newTemp(Ity_V256
);
11300 assign(rV
, binop(Iop_V128HLtoV256
, mkexpr(rVhi
), mkexpr(rVlo
)));
11305 static IRTemp
math_PBLENDW_128 ( IRTemp sV
, IRTemp dV
, UInt imm8
)
11307 /* Make w be a 16-bit version of imm8, formed by duplicating each
11311 for (i
= 0; i
< 8; i
++) {
11312 if (imm8
& (1 << i
))
11313 imm16
|= (3 << (2*i
));
11315 IRTemp imm16_mask
= newTemp(Ity_V128
);
11316 assign( imm16_mask
, mkV128( imm16
));
11318 IRTemp res
= newTemp(Ity_V128
);
11319 assign ( res
, binop( Iop_OrV128
,
11320 binop( Iop_AndV128
, mkexpr(sV
),
11321 mkexpr(imm16_mask
) ),
11322 binop( Iop_AndV128
, mkexpr(dV
),
11323 unop( Iop_NotV128
, mkexpr(imm16_mask
) ) ) ) );
11328 static IRTemp
math_PMULUDQ_128 ( IRTemp sV
, IRTemp dV
)
11330 /* This is a really poor translation -- could be improved if
11331 performance critical */
11332 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
11333 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
11334 breakupV128to32s( dV
, &d3
, &d2
, &d1
, &d0
);
11335 breakupV128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
11336 IRTemp res
= newTemp(Ity_V128
);
11337 assign(res
, binop(Iop_64HLtoV128
,
11338 binop( Iop_MullU32
, mkexpr(d2
), mkexpr(s2
)),
11339 binop( Iop_MullU32
, mkexpr(d0
), mkexpr(s0
)) ));
11344 static IRTemp
math_PMULUDQ_256 ( IRTemp sV
, IRTemp dV
)
11346 /* This is a really poor translation -- could be improved if
11347 performance critical */
11348 IRTemp sHi
, sLo
, dHi
, dLo
;
11349 sHi
= sLo
= dHi
= dLo
= IRTemp_INVALID
;
11350 breakupV256toV128s( dV
, &dHi
, &dLo
);
11351 breakupV256toV128s( sV
, &sHi
, &sLo
);
11352 IRTemp res
= newTemp(Ity_V256
);
11353 assign(res
, binop(Iop_V128HLtoV256
,
11354 mkexpr(math_PMULUDQ_128(sHi
, dHi
)),
11355 mkexpr(math_PMULUDQ_128(sLo
, dLo
))));
11360 static IRTemp
math_PMULDQ_128 ( IRTemp dV
, IRTemp sV
)
11362 /* This is a really poor translation -- could be improved if
11363 performance critical */
11364 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
11365 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
11366 breakupV128to32s( dV
, &d3
, &d2
, &d1
, &d0
);
11367 breakupV128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
11368 IRTemp res
= newTemp(Ity_V128
);
11369 assign(res
, binop(Iop_64HLtoV128
,
11370 binop( Iop_MullS32
, mkexpr(d2
), mkexpr(s2
)),
11371 binop( Iop_MullS32
, mkexpr(d0
), mkexpr(s0
)) ));
11376 static IRTemp
math_PMULDQ_256 ( IRTemp sV
, IRTemp dV
)
11378 /* This is a really poor translation -- could be improved if
11379 performance critical */
11380 IRTemp sHi
, sLo
, dHi
, dLo
;
11381 sHi
= sLo
= dHi
= dLo
= IRTemp_INVALID
;
11382 breakupV256toV128s( dV
, &dHi
, &dLo
);
11383 breakupV256toV128s( sV
, &sHi
, &sLo
);
11384 IRTemp res
= newTemp(Ity_V256
);
11385 assign(res
, binop(Iop_V128HLtoV256
,
11386 mkexpr(math_PMULDQ_128(sHi
, dHi
)),
11387 mkexpr(math_PMULDQ_128(sLo
, dLo
))));
11392 static IRTemp
math_PMADDWD_128 ( IRTemp dV
, IRTemp sV
)
11394 IRTemp sVhi
, sVlo
, dVhi
, dVlo
;
11395 IRTemp resHi
= newTemp(Ity_I64
);
11396 IRTemp resLo
= newTemp(Ity_I64
);
11397 sVhi
= sVlo
= dVhi
= dVlo
= IRTemp_INVALID
;
11398 breakupV128to64s( sV
, &sVhi
, &sVlo
);
11399 breakupV128to64s( dV
, &dVhi
, &dVlo
);
11400 assign( resHi
, mkIRExprCCall(Ity_I64
, 0/*regparms*/,
11401 "amd64g_calculate_mmx_pmaddwd",
11402 &amd64g_calculate_mmx_pmaddwd
,
11403 mkIRExprVec_2( mkexpr(sVhi
), mkexpr(dVhi
))));
11404 assign( resLo
, mkIRExprCCall(Ity_I64
, 0/*regparms*/,
11405 "amd64g_calculate_mmx_pmaddwd",
11406 &amd64g_calculate_mmx_pmaddwd
,
11407 mkIRExprVec_2( mkexpr(sVlo
), mkexpr(dVlo
))));
11408 IRTemp res
= newTemp(Ity_V128
);
11409 assign( res
, binop(Iop_64HLtoV128
, mkexpr(resHi
), mkexpr(resLo
))) ;
11414 static IRTemp
math_PMADDWD_256 ( IRTemp dV
, IRTemp sV
)
11416 IRTemp sHi
, sLo
, dHi
, dLo
;
11417 sHi
= sLo
= dHi
= dLo
= IRTemp_INVALID
;
11418 breakupV256toV128s( dV
, &dHi
, &dLo
);
11419 breakupV256toV128s( sV
, &sHi
, &sLo
);
11420 IRTemp res
= newTemp(Ity_V256
);
11421 assign(res
, binop(Iop_V128HLtoV256
,
11422 mkexpr(math_PMADDWD_128(dHi
, sHi
)),
11423 mkexpr(math_PMADDWD_128(dLo
, sLo
))));
11428 static IRTemp
math_ADDSUBPD_128 ( IRTemp dV
, IRTemp sV
)
11430 IRTemp addV
= newTemp(Ity_V128
);
11431 IRTemp subV
= newTemp(Ity_V128
);
11432 IRTemp a1
= newTemp(Ity_I64
);
11433 IRTemp s0
= newTemp(Ity_I64
);
11434 IRTemp rm
= newTemp(Ity_I32
);
11436 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11437 assign( addV
, triop(Iop_Add64Fx2
, mkexpr(rm
), mkexpr(dV
), mkexpr(sV
)) );
11438 assign( subV
, triop(Iop_Sub64Fx2
, mkexpr(rm
), mkexpr(dV
), mkexpr(sV
)) );
11440 assign( a1
, unop(Iop_V128HIto64
, mkexpr(addV
) ));
11441 assign( s0
, unop(Iop_V128to64
, mkexpr(subV
) ));
11443 IRTemp res
= newTemp(Ity_V128
);
11444 assign( res
, binop(Iop_64HLtoV128
, mkexpr(a1
), mkexpr(s0
)) );
11449 static IRTemp
math_ADDSUBPD_256 ( IRTemp dV
, IRTemp sV
)
11451 IRTemp a3
, a2
, a1
, a0
, s3
, s2
, s1
, s0
;
11452 IRTemp addV
= newTemp(Ity_V256
);
11453 IRTemp subV
= newTemp(Ity_V256
);
11454 IRTemp rm
= newTemp(Ity_I32
);
11455 a3
= a2
= a1
= a0
= s3
= s2
= s1
= s0
= IRTemp_INVALID
;
11457 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11458 assign( addV
, triop(Iop_Add64Fx4
, mkexpr(rm
), mkexpr(dV
), mkexpr(sV
)) );
11459 assign( subV
, triop(Iop_Sub64Fx4
, mkexpr(rm
), mkexpr(dV
), mkexpr(sV
)) );
11461 breakupV256to64s( addV
, &a3
, &a2
, &a1
, &a0
);
11462 breakupV256to64s( subV
, &s3
, &s2
, &s1
, &s0
);
11464 IRTemp res
= newTemp(Ity_V256
);
11465 assign( res
, mkV256from64s( a3
, s2
, a1
, s0
) );
11470 static IRTemp
math_ADDSUBPS_128 ( IRTemp dV
, IRTemp sV
)
11472 IRTemp a3
, a2
, a1
, a0
, s3
, s2
, s1
, s0
;
11473 IRTemp addV
= newTemp(Ity_V128
);
11474 IRTemp subV
= newTemp(Ity_V128
);
11475 IRTemp rm
= newTemp(Ity_I32
);
11476 a3
= a2
= a1
= a0
= s3
= s2
= s1
= s0
= IRTemp_INVALID
;
11478 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11479 assign( addV
, triop(Iop_Add32Fx4
, mkexpr(rm
), mkexpr(dV
), mkexpr(sV
)) );
11480 assign( subV
, triop(Iop_Sub32Fx4
, mkexpr(rm
), mkexpr(dV
), mkexpr(sV
)) );
11482 breakupV128to32s( addV
, &a3
, &a2
, &a1
, &a0
);
11483 breakupV128to32s( subV
, &s3
, &s2
, &s1
, &s0
);
11485 IRTemp res
= newTemp(Ity_V128
);
11486 assign( res
, mkV128from32s( a3
, s2
, a1
, s0
) );
11491 static IRTemp
math_ADDSUBPS_256 ( IRTemp dV
, IRTemp sV
)
11493 IRTemp a7
, a6
, a5
, a4
, a3
, a2
, a1
, a0
;
11494 IRTemp s7
, s6
, s5
, s4
, s3
, s2
, s1
, s0
;
11495 IRTemp addV
= newTemp(Ity_V256
);
11496 IRTemp subV
= newTemp(Ity_V256
);
11497 IRTemp rm
= newTemp(Ity_I32
);
11498 a7
= a6
= a5
= a4
= a3
= a2
= a1
= a0
= IRTemp_INVALID
;
11499 s7
= s6
= s5
= s4
= s3
= s2
= s1
= s0
= IRTemp_INVALID
;
11501 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11502 assign( addV
, triop(Iop_Add32Fx8
, mkexpr(rm
), mkexpr(dV
), mkexpr(sV
)) );
11503 assign( subV
, triop(Iop_Sub32Fx8
, mkexpr(rm
), mkexpr(dV
), mkexpr(sV
)) );
11505 breakupV256to32s( addV
, &a7
, &a6
, &a5
, &a4
, &a3
, &a2
, &a1
, &a0
);
11506 breakupV256to32s( subV
, &s7
, &s6
, &s5
, &s4
, &s3
, &s2
, &s1
, &s0
);
11508 IRTemp res
= newTemp(Ity_V256
);
11509 assign( res
, mkV256from32s( a7
, s6
, a5
, s4
, a3
, s2
, a1
, s0
) );
11514 /* Handle 128 bit PSHUFLW and PSHUFHW. */
11515 static Long
dis_PSHUFxW_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
11516 Long delta
, Bool isAvx
, Bool xIsH
)
11518 IRTemp addr
= IRTemp_INVALID
;
11521 UChar modrm
= getUChar(delta
);
11522 UInt rG
= gregOfRexRM(pfx
,modrm
);
11524 IRTemp sVmut
, dVmut
, sVcon
, sV
, dV
, s3
, s2
, s1
, s0
;
11525 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
11526 sV
= newTemp(Ity_V128
);
11527 dV
= newTemp(Ity_V128
);
11528 sVmut
= newTemp(Ity_I64
);
11529 dVmut
= newTemp(Ity_I64
);
11530 sVcon
= newTemp(Ity_I64
);
11531 if (epartIsReg(modrm
)) {
11532 UInt rE
= eregOfRexRM(pfx
,modrm
);
11533 assign( sV
, getXMMReg(rE
) );
11534 imm8
= (UInt
)getUChar(delta
+1);
11536 DIP("%spshuf%cw $%u,%s,%s\n",
11537 isAvx
? "v" : "", xIsH
? 'h' : 'l',
11538 imm8
, nameXMMReg(rE
), nameXMMReg(rG
));
11540 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
11541 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
11542 imm8
= (UInt
)getUChar(delta
+alen
);
11544 DIP("%spshuf%cw $%u,%s,%s\n",
11545 isAvx
? "v" : "", xIsH
? 'h' : 'l',
11546 imm8
, dis_buf
, nameXMMReg(rG
));
11549 /* Get the to-be-changed (mut) and unchanging (con) bits of the
11551 assign( sVmut
, unop(xIsH
? Iop_V128HIto64
: Iop_V128to64
, mkexpr(sV
)) );
11552 assign( sVcon
, unop(xIsH
? Iop_V128to64
: Iop_V128HIto64
, mkexpr(sV
)) );
11554 breakup64to16s( sVmut
, &s3
, &s2
, &s1
, &s0
);
11556 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11557 assign(dVmut
, mk64from16s( SEL((imm8
>>6)&3), SEL((imm8
>>4)&3),
11558 SEL((imm8
>>2)&3), SEL((imm8
>>0)&3) ));
11561 assign(dV
, xIsH
? binop(Iop_64HLtoV128
, mkexpr(dVmut
), mkexpr(sVcon
))
11562 : binop(Iop_64HLtoV128
, mkexpr(sVcon
), mkexpr(dVmut
)) );
11564 (isAvx
? putYMMRegLoAndZU
: putXMMReg
)(rG
, mkexpr(dV
));
11569 /* Handle 256 bit PSHUFLW and PSHUFHW. */
11570 static Long
dis_PSHUFxW_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
11571 Long delta
, Bool xIsH
)
11573 IRTemp addr
= IRTemp_INVALID
;
11576 UChar modrm
= getUChar(delta
);
11577 UInt rG
= gregOfRexRM(pfx
,modrm
);
11579 IRTemp sV
, s
[8], sV64
[4], dVhi
, dVlo
;
11580 sV64
[3] = sV64
[2] = sV64
[1] = sV64
[0] = IRTemp_INVALID
;
11581 s
[7] = s
[6] = s
[5] = s
[4] = s
[3] = s
[2] = s
[1] = s
[0] = IRTemp_INVALID
;
11582 sV
= newTemp(Ity_V256
);
11583 dVhi
= newTemp(Ity_I64
);
11584 dVlo
= newTemp(Ity_I64
);
11585 if (epartIsReg(modrm
)) {
11586 UInt rE
= eregOfRexRM(pfx
,modrm
);
11587 assign( sV
, getYMMReg(rE
) );
11588 imm8
= (UInt
)getUChar(delta
+1);
11590 DIP("vpshuf%cw $%u,%s,%s\n", xIsH
? 'h' : 'l',
11591 imm8
, nameYMMReg(rE
), nameYMMReg(rG
));
11593 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
11594 assign( sV
, loadLE(Ity_V256
, mkexpr(addr
)) );
11595 imm8
= (UInt
)getUChar(delta
+alen
);
11597 DIP("vpshuf%cw $%u,%s,%s\n", xIsH
? 'h' : 'l',
11598 imm8
, dis_buf
, nameYMMReg(rG
));
11601 breakupV256to64s( sV
, &sV64
[3], &sV64
[2], &sV64
[1], &sV64
[0] );
11602 breakup64to16s( sV64
[xIsH
? 3 : 2], &s
[7], &s
[6], &s
[5], &s
[4] );
11603 breakup64to16s( sV64
[xIsH
? 1 : 0], &s
[3], &s
[2], &s
[1], &s
[0] );
11605 assign( dVhi
, mk64from16s( s
[4 + ((imm8
>>6)&3)], s
[4 + ((imm8
>>4)&3)],
11606 s
[4 + ((imm8
>>2)&3)], s
[4 + ((imm8
>>0)&3)] ) );
11607 assign( dVlo
, mk64from16s( s
[0 + ((imm8
>>6)&3)], s
[0 + ((imm8
>>4)&3)],
11608 s
[0 + ((imm8
>>2)&3)], s
[0 + ((imm8
>>0)&3)] ) );
11609 putYMMReg( rG
, mkV256from64s( xIsH
? dVhi
: sV64
[3],
11610 xIsH
? sV64
[2] : dVhi
,
11611 xIsH
? dVlo
: sV64
[1],
11612 xIsH
? sV64
[0] : dVlo
) );
11617 static Long
dis_PEXTRW_128_EregOnly_toG ( const VexAbiInfo
* vbi
, Prefix pfx
,
11618 Long delta
, Bool isAvx
)
11620 Long deltaIN
= delta
;
11621 UChar modrm
= getUChar(delta
);
11622 UInt rG
= gregOfRexRM(pfx
,modrm
);
11623 IRTemp sV
= newTemp(Ity_V128
);
11624 IRTemp d16
= newTemp(Ity_I16
);
11626 IRTemp s0
, s1
, s2
, s3
;
11627 if (epartIsReg(modrm
)) {
11628 UInt rE
= eregOfRexRM(pfx
,modrm
);
11629 assign(sV
, getXMMReg(rE
));
11630 imm8
= getUChar(delta
+1) & 7;
11632 DIP("%spextrw $%u,%s,%s\n", isAvx
? "v" : "",
11633 imm8
, nameXMMReg(rE
), nameIReg32(rG
));
11635 /* The memory case is disallowed, apparently. */
11636 return deltaIN
; /* FAIL */
11638 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
11639 breakupV128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
11641 case 0: assign(d16
, unop(Iop_32to16
, mkexpr(s0
))); break;
11642 case 1: assign(d16
, unop(Iop_32HIto16
, mkexpr(s0
))); break;
11643 case 2: assign(d16
, unop(Iop_32to16
, mkexpr(s1
))); break;
11644 case 3: assign(d16
, unop(Iop_32HIto16
, mkexpr(s1
))); break;
11645 case 4: assign(d16
, unop(Iop_32to16
, mkexpr(s2
))); break;
11646 case 5: assign(d16
, unop(Iop_32HIto16
, mkexpr(s2
))); break;
11647 case 6: assign(d16
, unop(Iop_32to16
, mkexpr(s3
))); break;
11648 case 7: assign(d16
, unop(Iop_32HIto16
, mkexpr(s3
))); break;
11649 default: vassert(0);
11651 putIReg32(rG
, unop(Iop_16Uto32
, mkexpr(d16
)));
11656 static Long
dis_CVTDQ2PD_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
11657 Long delta
, Bool isAvx
)
11659 IRTemp addr
= IRTemp_INVALID
;
11662 UChar modrm
= getUChar(delta
);
11663 IRTemp arg64
= newTemp(Ity_I64
);
11664 UInt rG
= gregOfRexRM(pfx
,modrm
);
11665 const HChar
* mbV
= isAvx
? "v" : "";
11666 if (epartIsReg(modrm
)) {
11667 UInt rE
= eregOfRexRM(pfx
,modrm
);
11668 assign( arg64
, getXMMRegLane64(rE
, 0) );
11670 DIP("%scvtdq2pd %s,%s\n", mbV
, nameXMMReg(rE
), nameXMMReg(rG
));
11672 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
11673 assign( arg64
, loadLE(Ity_I64
, mkexpr(addr
)) );
11675 DIP("%scvtdq2pd %s,%s\n", mbV
, dis_buf
, nameXMMReg(rG
) );
11679 unop(Iop_I32StoF64
, unop(Iop_64to32
, mkexpr(arg64
)))
11683 unop(Iop_I32StoF64
, unop(Iop_64HIto32
, mkexpr(arg64
)))
11686 putYMMRegLane128(rG
, 1, mkV128(0));
11691 static Long
dis_STMXCSR ( const VexAbiInfo
* vbi
, Prefix pfx
,
11692 Long delta
, Bool isAvx
)
11694 IRTemp addr
= IRTemp_INVALID
;
11697 UChar modrm
= getUChar(delta
);
11698 vassert(!epartIsReg(modrm
)); /* ensured by caller */
11699 vassert(gregOfRexRM(pfx
,modrm
) == 3); /* ditto */
11701 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
11704 /* Fake up a native SSE mxcsr word. The only thing it depends on
11705 is SSEROUND[1:0], so call a clean helper to cook it up.
11707 /* ULong amd64h_create_mxcsr ( ULong sseround ) */
11708 DIP("%sstmxcsr %s\n", isAvx
? "v" : "", dis_buf
);
11713 Ity_I64
, 0/*regp*/,
11714 "amd64g_create_mxcsr", &amd64g_create_mxcsr
,
11715 mkIRExprVec_1( unop(Iop_32Uto64
,get_sse_roundingmode()) )
11723 static Long
dis_LDMXCSR ( const VexAbiInfo
* vbi
, Prefix pfx
,
11724 Long delta
, Bool isAvx
)
11726 IRTemp addr
= IRTemp_INVALID
;
11729 UChar modrm
= getUChar(delta
);
11730 vassert(!epartIsReg(modrm
)); /* ensured by caller */
11731 vassert(gregOfRexRM(pfx
,modrm
) == 2); /* ditto */
11733 IRTemp t64
= newTemp(Ity_I64
);
11734 IRTemp ew
= newTemp(Ity_I32
);
11736 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
11738 DIP("%sldmxcsr %s\n", isAvx
? "v" : "", dis_buf
);
11740 /* The only thing we observe in %mxcsr is the rounding mode.
11741 Therefore, pass the 32-bit value (SSE native-format control
11742 word) to a clean helper, getting back a 64-bit value, the
11743 lower half of which is the SSEROUND value to store, and the
11744 upper half of which is the emulation-warning token which may
11747 /* ULong amd64h_check_ldmxcsr ( ULong ); */
11748 assign( t64
, mkIRExprCCall(
11749 Ity_I64
, 0/*regparms*/,
11750 "amd64g_check_ldmxcsr",
11751 &amd64g_check_ldmxcsr
,
11754 loadLE(Ity_I32
, mkexpr(addr
))
11760 put_sse_roundingmode( unop(Iop_64to32
, mkexpr(t64
)) );
11761 assign( ew
, unop(Iop_64HIto32
, mkexpr(t64
) ) );
11762 put_emwarn( mkexpr(ew
) );
11763 /* Finally, if an emulation warning was reported, side-exit to
11764 the next insn, reporting the warning, so that Valgrind's
11765 dispatcher sees the warning. */
11768 binop(Iop_CmpNE64
, unop(Iop_32Uto64
,mkexpr(ew
)), mkU64(0)),
11770 IRConst_U64(guest_RIP_bbstart
+delta
),
11778 static void gen_XSAVE_SEQUENCE ( IRTemp addr
, IRTemp rfbm
)
11780 /* ------ rfbm[0] gates the x87 state ------ */
11782 /* Uses dirty helper:
11783 void amd64g_do_XSAVE_COMPONENT_0 ( VexGuestAMD64State*, ULong )
11785 IRDirty
* d0
= unsafeIRDirty_0_N (
11787 "amd64g_dirtyhelper_XSAVE_COMPONENT_0",
11788 &amd64g_dirtyhelper_XSAVE_COMPONENT_0
,
11789 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
11791 d0
->guard
= binop(Iop_CmpEQ64
, binop(Iop_And64
, mkexpr(rfbm
), mkU64(1)),
11794 /* Declare we're writing memory. Really, bytes 24 through 31
11795 (MXCSR and MXCSR_MASK) aren't written, but we can't express more
11796 than 1 memory area here, so just mark the whole thing as
11798 d0
->mFx
= Ifx_Write
;
11799 d0
->mAddr
= mkexpr(addr
);
11802 /* declare we're reading guest state */
11804 vex_bzero(&d0
->fxState
, sizeof(d0
->fxState
));
11806 d0
->fxState
[0].fx
= Ifx_Read
;
11807 d0
->fxState
[0].offset
= OFFB_FTOP
;
11808 d0
->fxState
[0].size
= sizeof(UInt
);
11810 d0
->fxState
[1].fx
= Ifx_Read
;
11811 d0
->fxState
[1].offset
= OFFB_FPREGS
;
11812 d0
->fxState
[1].size
= 8 * sizeof(ULong
);
11814 d0
->fxState
[2].fx
= Ifx_Read
;
11815 d0
->fxState
[2].offset
= OFFB_FPTAGS
;
11816 d0
->fxState
[2].size
= 8 * sizeof(UChar
);
11818 d0
->fxState
[3].fx
= Ifx_Read
;
11819 d0
->fxState
[3].offset
= OFFB_FPROUND
;
11820 d0
->fxState
[3].size
= sizeof(ULong
);
11822 d0
->fxState
[4].fx
= Ifx_Read
;
11823 d0
->fxState
[4].offset
= OFFB_FC3210
;
11824 d0
->fxState
[4].size
= sizeof(ULong
);
11826 stmt( IRStmt_Dirty(d0
) );
11828 /* ------ rfbm[1] gates the SSE state ------ */
11830 IRTemp rfbm_1
= newTemp(Ity_I64
);
11831 IRTemp rfbm_1or2
= newTemp(Ity_I64
);
11832 assign(rfbm_1
, binop(Iop_And64
, mkexpr(rfbm
), mkU64(2)));
11833 assign(rfbm_1or2
, binop(Iop_And64
, mkexpr(rfbm
), mkU64(6)));
11835 IRExpr
* guard_1
= binop(Iop_CmpEQ64
, mkexpr(rfbm_1
), mkU64(2));
11836 IRExpr
* guard_1or2
= binop(Iop_CmpNE64
, mkexpr(rfbm_1or2
), mkU64(0));
11838 /* Uses dirty helper:
11839 void amd64g_do_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS
11840 ( VexGuestAMD64State*, ULong )
11841 This creates only MXCSR and MXCSR_MASK. We need to do this if
11842 either components 1 (SSE) or 2 (AVX) are requested. Hence the
11843 guard condition is a bit more complex.
11845 IRDirty
* d1
= unsafeIRDirty_0_N (
11847 "amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS",
11848 &amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS
,
11849 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
11851 d1
->guard
= guard_1or2
;
11853 /* Declare we're writing memory: MXCSR and MXCSR_MASK. Note that
11854 the code for rbfm[0] just above claims a write of 0 .. 159, so
11855 this duplicates it. But at least correctly connects 24 .. 31 to
11856 the MXCSR guest state representation (SSEROUND field). */
11857 d1
->mFx
= Ifx_Write
;
11858 d1
->mAddr
= binop(Iop_Add64
, mkexpr(addr
), mkU64(24));
11861 /* declare we're reading guest state */
11863 vex_bzero(&d1
->fxState
, sizeof(d1
->fxState
));
11865 d1
->fxState
[0].fx
= Ifx_Read
;
11866 d1
->fxState
[0].offset
= OFFB_SSEROUND
;
11867 d1
->fxState
[0].size
= sizeof(ULong
);
11869 /* Call the helper. This creates MXCSR and MXCSR_MASK but nothing
11870 else. We do the actual register array, XMM[0..15], separately,
11871 in order that any undefinedness in the XMM registers is tracked
11872 separately by Memcheck and does not "infect" the in-memory
11873 shadow for the other parts of the image. */
11874 stmt( IRStmt_Dirty(d1
) );
11876 /* And now the XMMs themselves. */
11878 for (reg
= 0; reg
< 16; reg
++) {
11879 stmt( IRStmt_StoreG(
11881 binop(Iop_Add64
, mkexpr(addr
), mkU64(160 + reg
* 16)),
11887 /* ------ rfbm[2] gates the AVX state ------ */
11888 /* Component 2 is just a bunch of register saves, so we'll do it
11889 inline, just to be simple and to be Memcheck friendly. */
11891 IRTemp rfbm_2
= newTemp(Ity_I64
);
11892 assign(rfbm_2
, binop(Iop_And64
, mkexpr(rfbm
), mkU64(4)));
11894 IRExpr
* guard_2
= binop(Iop_CmpEQ64
, mkexpr(rfbm_2
), mkU64(4));
11896 for (reg
= 0; reg
< 16; reg
++) {
11897 stmt( IRStmt_StoreG(
11899 binop(Iop_Add64
, mkexpr(addr
), mkU64(576 + reg
* 16)),
11900 getYMMRegLane128(reg
,1),
11907 static Long
dis_XSAVE ( const VexAbiInfo
* vbi
,
11908 Prefix pfx
, Long delta
, Int sz
)
11910 /* Note that the presence or absence of REX.W (indicated here by
11911 |sz|) slightly affects the written format: whether the saved FPU
11912 IP and DP pointers are 64 or 32 bits. But the helper function
11913 we call simply writes zero bits in the relevant fields, which
11914 are 64 bits regardless of what REX.W is, and so it's good enough
11915 (iow, equally broken) in both cases. */
11916 IRTemp addr
= IRTemp_INVALID
;
11919 UChar modrm
= getUChar(delta
);
11920 vassert(!epartIsReg(modrm
)); /* ensured by caller */
11921 vassert(sz
== 4 || sz
== 8); /* ditto */
11923 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
11925 gen_SIGNAL_if_not_64_aligned(vbi
, addr
);
11927 DIP("%sxsave %s\n", sz
==8 ? "rex64/" : "", dis_buf
);
11929 /* VEX's caller is assumed to have checked this. */
11930 const ULong aSSUMED_XCR0_VALUE
= 7;
11932 IRTemp rfbm
= newTemp(Ity_I64
);
11937 unop(Iop_32Uto64
, getIRegRDX(4)), mkU8(32)),
11938 unop(Iop_32Uto64
, getIRegRAX(4))),
11939 mkU64(aSSUMED_XCR0_VALUE
)));
11941 gen_XSAVE_SEQUENCE(addr
, rfbm
);
11943 /* Finally, we need to update XSTATE_BV in the XSAVE header area, by
11944 OR-ing the RFBM value into it. */
11945 IRTemp addr_plus_512
= newTemp(Ity_I64
);
11946 assign(addr_plus_512
, binop(Iop_Add64
, mkexpr(addr
), mkU64(512)));
11947 storeLE( mkexpr(addr_plus_512
),
11949 unop(Iop_64to8
, mkexpr(rfbm
)),
11950 loadLE(Ity_I8
, mkexpr(addr_plus_512
))) );
11956 static Long
dis_FXSAVE ( const VexAbiInfo
* vbi
,
11957 Prefix pfx
, Long delta
, Int sz
)
11959 /* See comment in dis_XSAVE about the significance of REX.W. */
11960 IRTemp addr
= IRTemp_INVALID
;
11963 UChar modrm
= getUChar(delta
);
11964 vassert(!epartIsReg(modrm
)); /* ensured by caller */
11965 vassert(sz
== 4 || sz
== 8); /* ditto */
11967 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
11969 gen_SIGNAL_if_not_16_aligned(vbi
, addr
);
11971 DIP("%sfxsave %s\n", sz
==8 ? "rex64/" : "", dis_buf
);
11973 /* FXSAVE is just XSAVE with components 0 and 1 selected. Set rfbm
11974 to 0b011, generate the XSAVE sequence accordingly, and let iropt
11975 fold out the unused (AVX) parts accordingly. */
11976 IRTemp rfbm
= newTemp(Ity_I64
);
11977 assign(rfbm
, mkU64(3));
11978 gen_XSAVE_SEQUENCE(addr
, rfbm
);
11984 static void gen_XRSTOR_SEQUENCE ( IRTemp addr
, IRTemp xstate_bv
, IRTemp rfbm
)
11986 /* ------ rfbm[0] gates the x87 state ------ */
11988 /* If rfbm[0] == 1, we have to write the x87 state. If
11989 xstate_bv[0] == 1, we will read it from the memory image, else
11990 we'll set it to initial values. Doing this with a helper
11991 function and getting the definedness flow annotations correct is
11992 too difficult, so generate stupid but simple code: first set the
11993 registers to initial values, regardless of xstate_bv[0]. Then,
11994 conditionally restore from the memory image. */
11996 IRTemp rfbm_0
= newTemp(Ity_I64
);
11997 IRTemp xstate_bv_0
= newTemp(Ity_I64
);
11998 IRTemp restore_0
= newTemp(Ity_I64
);
11999 assign(rfbm_0
, binop(Iop_And64
, mkexpr(rfbm
), mkU64(1)));
12000 assign(xstate_bv_0
, binop(Iop_And64
, mkexpr(xstate_bv
), mkU64(1)));
12001 assign(restore_0
, binop(Iop_And64
, mkexpr(rfbm_0
), mkexpr(xstate_bv_0
)));
12003 gen_FINIT_SEQUENCE( binop(Iop_CmpNE64
, mkexpr(rfbm_0
), mkU64(0)) );
12005 /* Uses dirty helper:
12006 void amd64g_do_XRSTOR_COMPONENT_0 ( VexGuestAMD64State*, ULong )
12008 IRDirty
* d0
= unsafeIRDirty_0_N (
12010 "amd64g_dirtyhelper_XRSTOR_COMPONENT_0",
12011 &amd64g_dirtyhelper_XRSTOR_COMPONENT_0
,
12012 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
12014 d0
->guard
= binop(Iop_CmpNE64
, mkexpr(restore_0
), mkU64(0));
12016 /* Declare we're reading memory. Really, bytes 24 through 31
12017 (MXCSR and MXCSR_MASK) aren't read, but we can't express more
12018 than 1 memory area here, so just mark the whole thing as
12020 d0
->mFx
= Ifx_Read
;
12021 d0
->mAddr
= mkexpr(addr
);
12024 /* declare we're writing guest state */
12026 vex_bzero(&d0
->fxState
, sizeof(d0
->fxState
));
12028 d0
->fxState
[0].fx
= Ifx_Write
;
12029 d0
->fxState
[0].offset
= OFFB_FTOP
;
12030 d0
->fxState
[0].size
= sizeof(UInt
);
12032 d0
->fxState
[1].fx
= Ifx_Write
;
12033 d0
->fxState
[1].offset
= OFFB_FPREGS
;
12034 d0
->fxState
[1].size
= 8 * sizeof(ULong
);
12036 d0
->fxState
[2].fx
= Ifx_Write
;
12037 d0
->fxState
[2].offset
= OFFB_FPTAGS
;
12038 d0
->fxState
[2].size
= 8 * sizeof(UChar
);
12040 d0
->fxState
[3].fx
= Ifx_Write
;
12041 d0
->fxState
[3].offset
= OFFB_FPROUND
;
12042 d0
->fxState
[3].size
= sizeof(ULong
);
12044 d0
->fxState
[4].fx
= Ifx_Write
;
12045 d0
->fxState
[4].offset
= OFFB_FC3210
;
12046 d0
->fxState
[4].size
= sizeof(ULong
);
12048 stmt( IRStmt_Dirty(d0
) );
12050 /* ------ rfbm[1] gates the SSE state ------ */
12052 /* Same scheme as component 0: first zero it out, and then possibly
12053 restore from the memory area. */
12054 IRTemp rfbm_1
= newTemp(Ity_I64
);
12055 IRTemp xstate_bv_1
= newTemp(Ity_I64
);
12056 IRTemp restore_1
= newTemp(Ity_I64
);
12057 assign(rfbm_1
, binop(Iop_And64
, mkexpr(rfbm
), mkU64(2)));
12058 assign(xstate_bv_1
, binop(Iop_And64
, mkexpr(xstate_bv
), mkU64(2)));
12059 assign(restore_1
, binop(Iop_And64
, mkexpr(rfbm_1
), mkexpr(xstate_bv_1
)));
12060 IRExpr
* rfbm_1e
= binop(Iop_CmpNE64
, mkexpr(rfbm_1
), mkU64(0));
12061 IRExpr
* restore_1e
= binop(Iop_CmpNE64
, mkexpr(restore_1
), mkU64(0));
12063 IRTemp rfbm_1or2
= newTemp(Ity_I64
);
12064 IRTemp xstate_bv_1or2
= newTemp(Ity_I64
);
12065 IRTemp restore_1or2
= newTemp(Ity_I64
);
12066 assign(rfbm_1or2
, binop(Iop_And64
, mkexpr(rfbm
), mkU64(6)));
12067 assign(xstate_bv_1or2
, binop(Iop_And64
, mkexpr(xstate_bv
), mkU64(6)));
12068 assign(restore_1or2
, binop(Iop_And64
, mkexpr(rfbm_1or2
),
12069 mkexpr(xstate_bv_1or2
)));
12070 IRExpr
* rfbm_1or2e
= binop(Iop_CmpNE64
, mkexpr(rfbm_1or2
), mkU64(0));
12071 IRExpr
* restore_1or2e
= binop(Iop_CmpNE64
, mkexpr(restore_1or2
), mkU64(0));
12073 /* The areas in question are: SSEROUND, and the XMM register array. */
12074 putGuarded(OFFB_SSEROUND
, rfbm_1or2e
, mkU64(Irrm_NEAREST
));
12077 for (reg
= 0; reg
< 16; reg
++) {
12078 putGuarded(xmmGuestRegOffset(reg
), rfbm_1e
, mkV128(0));
12081 /* And now possibly restore from MXCSR/MXCSR_MASK */
12082 /* Uses dirty helper:
12083 void amd64g_do_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS
12084 ( VexGuestAMD64State*, ULong )
12085 This restores from only MXCSR and MXCSR_MASK. We need to do
12086 this if either components 1 (SSE) or 2 (AVX) are requested.
12087 Hence the guard condition is a bit more complex.
12089 IRDirty
* d1
= unsafeIRDirty_0_N (
12091 "amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS",
12092 &amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS
,
12093 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr
) )
12095 d1
->guard
= restore_1or2e
;
12097 /* Declare we're reading memory: MXCSR and MXCSR_MASK. Note that
12098 the code for rbfm[0] just above claims a read of 0 .. 159, so
12099 this duplicates it. But at least correctly connects 24 .. 31 to
12100 the MXCSR guest state representation (SSEROUND field). */
12101 d1
->mFx
= Ifx_Read
;
12102 d1
->mAddr
= binop(Iop_Add64
, mkexpr(addr
), mkU64(24));
12105 /* declare we're writing guest state */
12107 vex_bzero(&d1
->fxState
, sizeof(d1
->fxState
));
12109 d1
->fxState
[0].fx
= Ifx_Write
;
12110 d1
->fxState
[0].offset
= OFFB_SSEROUND
;
12111 d1
->fxState
[0].size
= sizeof(ULong
);
12113 /* Call the helper. This creates SSEROUND but nothing
12114 else. We do the actual register array, XMM[0..15], separately,
12115 in order that any undefinedness in the XMM registers is tracked
12116 separately by Memcheck and is not "infected" by the in-memory
12117 shadow for the other parts of the image. */
12118 stmt( IRStmt_Dirty(d1
) );
12120 /* And now the XMMs themselves. For each register, we PUT either
12121 its old value, or the value loaded from memory. One convenient
12122 way to do that is with a conditional load that has its the
12123 default value, the old value of the register. */
12124 for (reg
= 0; reg
< 16; reg
++) {
12125 IRExpr
* ea
= binop(Iop_Add64
, mkexpr(addr
), mkU64(160 + reg
* 16));
12126 IRExpr
* alt
= getXMMReg(reg
);
12127 IRTemp loadedValue
= newTemp(Ity_V128
);
12128 stmt( IRStmt_LoadG(Iend_LE
,
12130 loadedValue
, ea
, alt
, restore_1e
) );
12131 putXMMReg(reg
, mkexpr(loadedValue
));
12134 /* ------ rfbm[2] gates the AVX state ------ */
12135 /* Component 2 is just a bunch of register loads, so we'll do it
12136 inline, just to be simple and to be Memcheck friendly. */
12138 /* Same scheme as component 0: first zero it out, and then possibly
12139 restore from the memory area. */
12140 IRTemp rfbm_2
= newTemp(Ity_I64
);
12141 IRTemp xstate_bv_2
= newTemp(Ity_I64
);
12142 IRTemp restore_2
= newTemp(Ity_I64
);
12143 assign(rfbm_2
, binop(Iop_And64
, mkexpr(rfbm
), mkU64(4)));
12144 assign(xstate_bv_2
, binop(Iop_And64
, mkexpr(xstate_bv
), mkU64(4)));
12145 assign(restore_2
, binop(Iop_And64
, mkexpr(rfbm_2
), mkexpr(xstate_bv_2
)));
12147 IRExpr
* rfbm_2e
= binop(Iop_CmpNE64
, mkexpr(rfbm_2
), mkU64(0));
12148 IRExpr
* restore_2e
= binop(Iop_CmpNE64
, mkexpr(restore_2
), mkU64(0));
12150 for (reg
= 0; reg
< 16; reg
++) {
12151 putGuarded(ymmGuestRegLane128offset(reg
, 1), rfbm_2e
, mkV128(0));
12154 for (reg
= 0; reg
< 16; reg
++) {
12155 IRExpr
* ea
= binop(Iop_Add64
, mkexpr(addr
), mkU64(576 + reg
* 16));
12156 IRExpr
* alt
= getYMMRegLane128(reg
, 1);
12157 IRTemp loadedValue
= newTemp(Ity_V128
);
12158 stmt( IRStmt_LoadG(Iend_LE
,
12160 loadedValue
, ea
, alt
, restore_2e
) );
12161 putYMMRegLane128(reg
, 1, mkexpr(loadedValue
));
12166 static Long
dis_XRSTOR ( const VexAbiInfo
* vbi
,
12167 Prefix pfx
, Long delta
, Int sz
)
12169 /* As with XRSTOR above we ignore the value of REX.W since we're
12170 not bothering with the FPU DP and IP fields. */
12171 IRTemp addr
= IRTemp_INVALID
;
12174 UChar modrm
= getUChar(delta
);
12175 vassert(!epartIsReg(modrm
)); /* ensured by caller */
12176 vassert(sz
== 4 || sz
== 8); /* ditto */
12178 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12180 gen_SIGNAL_if_not_64_aligned(vbi
, addr
);
12182 DIP("%sxrstor %s\n", sz
==8 ? "rex64/" : "", dis_buf
);
12184 /* VEX's caller is assumed to have checked this. */
12185 const ULong aSSUMED_XCR0_VALUE
= 7;
12187 IRTemp rfbm
= newTemp(Ity_I64
);
12192 unop(Iop_32Uto64
, getIRegRDX(4)), mkU8(32)),
12193 unop(Iop_32Uto64
, getIRegRAX(4))),
12194 mkU64(aSSUMED_XCR0_VALUE
)));
12196 IRTemp xstate_bv
= newTemp(Ity_I64
);
12197 assign(xstate_bv
, loadLE(Ity_I64
,
12198 binop(Iop_Add64
, mkexpr(addr
), mkU64(512+0))));
12200 IRTemp xcomp_bv
= newTemp(Ity_I64
);
12201 assign(xcomp_bv
, loadLE(Ity_I64
,
12202 binop(Iop_Add64
, mkexpr(addr
), mkU64(512+8))));
12204 IRTemp xsavehdr_23_16
= newTemp(Ity_I64
);
12205 assign( xsavehdr_23_16
,
12207 binop(Iop_Add64
, mkexpr(addr
), mkU64(512+16))));
12209 /* We must fault if
12210 * xcomp_bv[63] == 1, since this simulated CPU does not support
12211 the compaction extension.
12212 * xstate_bv sets a bit outside of XCR0 (which we assume to be 7).
12213 * any of the xsave header bytes 23 .. 8 are nonzero. This seems to
12214 imply that xcomp_bv must be zero.
12215 xcomp_bv is header bytes 15 .. 8 and xstate_bv is header bytes 7 .. 0
12217 IRTemp fault_if_nonzero
= newTemp(Ity_I64
);
12218 assign(fault_if_nonzero
,
12220 binop(Iop_And64
, mkexpr(xstate_bv
), mkU64(~aSSUMED_XCR0_VALUE
)),
12221 binop(Iop_Or64
, mkexpr(xcomp_bv
), mkexpr(xsavehdr_23_16
))));
12222 stmt( IRStmt_Exit(binop(Iop_CmpNE64
, mkexpr(fault_if_nonzero
), mkU64(0)),
12224 IRConst_U64(guest_RIP_curr_instr
),
12228 /* We are guaranteed now that both xstate_bv and rfbm are in the
12229 range 0 .. 7. Generate the restore sequence proper. */
12230 gen_XRSTOR_SEQUENCE(addr
, xstate_bv
, rfbm
);
12236 static Long
dis_FXRSTOR ( const VexAbiInfo
* vbi
,
12237 Prefix pfx
, Long delta
, Int sz
)
12239 /* As with FXSAVE above we ignore the value of REX.W since we're
12240 not bothering with the FPU DP and IP fields. */
12241 IRTemp addr
= IRTemp_INVALID
;
12244 UChar modrm
= getUChar(delta
);
12245 vassert(!epartIsReg(modrm
)); /* ensured by caller */
12246 vassert(sz
== 4 || sz
== 8); /* ditto */
12248 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12250 gen_SIGNAL_if_not_16_aligned(vbi
, addr
);
12252 DIP("%sfxrstor %s\n", sz
==8 ? "rex64/" : "", dis_buf
);
12254 /* FXRSTOR is just XRSTOR with components 0 and 1 selected and also
12255 as if components 0 and 1 are set as present in XSTATE_BV in the
12256 XSAVE header. Set both rfbm and xstate_bv to 0b011 therefore,
12257 generate the XRSTOR sequence accordingly, and let iropt fold out
12258 the unused (AVX) parts accordingly. */
12259 IRTemp three
= newTemp(Ity_I64
);
12260 assign(three
, mkU64(3));
12261 gen_XRSTOR_SEQUENCE(addr
, three
/*xstate_bv*/, three
/*rfbm*/);
12267 static IRTemp
math_PINSRW_128 ( IRTemp v128
, IRTemp u16
, UInt imm8
)
12269 vassert(imm8
>= 0 && imm8
<= 7);
12271 // Create a V128 value which has the selected word in the
12272 // specified lane, and zeroes everywhere else.
12273 IRTemp tmp128
= newTemp(Ity_V128
);
12274 IRTemp halfshift
= newTemp(Ity_I64
);
12275 assign(halfshift
, binop(Iop_Shl64
,
12276 unop(Iop_16Uto64
, mkexpr(u16
)),
12277 mkU8(16 * (imm8
& 3))));
12279 assign(tmp128
, binop(Iop_64HLtoV128
, mkU64(0), mkexpr(halfshift
)));
12281 assign(tmp128
, binop(Iop_64HLtoV128
, mkexpr(halfshift
), mkU64(0)));
12284 UShort mask
= ~(3 << (imm8
* 2));
12285 IRTemp res
= newTemp(Ity_V128
);
12286 assign( res
, binop(Iop_OrV128
,
12288 binop(Iop_AndV128
, mkexpr(v128
), mkV128(mask
))) );
12293 static IRTemp
math_PSADBW_128 ( IRTemp dV
, IRTemp sV
)
12295 IRTemp s1
, s0
, d1
, d0
;
12296 s1
= s0
= d1
= d0
= IRTemp_INVALID
;
12298 breakupV128to64s( sV
, &s1
, &s0
);
12299 breakupV128to64s( dV
, &d1
, &d0
);
12301 IRTemp res
= newTemp(Ity_V128
);
12303 binop(Iop_64HLtoV128
,
12304 mkIRExprCCall(Ity_I64
, 0/*regparms*/,
12305 "amd64g_calculate_mmx_psadbw",
12306 &amd64g_calculate_mmx_psadbw
,
12307 mkIRExprVec_2( mkexpr(s1
), mkexpr(d1
))),
12308 mkIRExprCCall(Ity_I64
, 0/*regparms*/,
12309 "amd64g_calculate_mmx_psadbw",
12310 &amd64g_calculate_mmx_psadbw
,
12311 mkIRExprVec_2( mkexpr(s0
), mkexpr(d0
)))) );
12316 static IRTemp
math_PSADBW_256 ( IRTemp dV
, IRTemp sV
)
12318 IRTemp sHi
, sLo
, dHi
, dLo
;
12319 sHi
= sLo
= dHi
= dLo
= IRTemp_INVALID
;
12320 breakupV256toV128s( dV
, &dHi
, &dLo
);
12321 breakupV256toV128s( sV
, &sHi
, &sLo
);
12322 IRTemp res
= newTemp(Ity_V256
);
12323 assign(res
, binop(Iop_V128HLtoV256
,
12324 mkexpr(math_PSADBW_128(dHi
, sHi
)),
12325 mkexpr(math_PSADBW_128(dLo
, sLo
))));
12330 static Long
dis_MASKMOVDQU ( const VexAbiInfo
* vbi
, Prefix pfx
,
12331 Long delta
, Bool isAvx
)
12333 IRTemp regD
= newTemp(Ity_V128
);
12334 IRTemp mask
= newTemp(Ity_V128
);
12335 IRTemp olddata
= newTemp(Ity_V128
);
12336 IRTemp newdata
= newTemp(Ity_V128
);
12337 IRTemp addr
= newTemp(Ity_I64
);
12338 UChar modrm
= getUChar(delta
);
12339 UInt rG
= gregOfRexRM(pfx
,modrm
);
12340 UInt rE
= eregOfRexRM(pfx
,modrm
);
12342 assign( addr
, handleAddrOverrides( vbi
, pfx
, getIReg64(R_RDI
) ));
12343 assign( regD
, getXMMReg( rG
));
12345 /* Unfortunately can't do the obvious thing with SarN8x16
12346 here since that can't be re-emitted as SSE2 code - no such
12349 binop(Iop_64HLtoV128
,
12351 getXMMRegLane64( eregOfRexRM(pfx
,modrm
), 1 ),
12354 getXMMRegLane64( eregOfRexRM(pfx
,modrm
), 0 ),
12356 assign( olddata
, loadLE( Ity_V128
, mkexpr(addr
) ));
12357 assign( newdata
, binop(Iop_OrV128
,
12363 unop(Iop_NotV128
, mkexpr(mask
)))) );
12364 storeLE( mkexpr(addr
), mkexpr(newdata
) );
12367 DIP("%smaskmovdqu %s,%s\n", isAvx
? "v" : "",
12368 nameXMMReg(rE
), nameXMMReg(rG
) );
12373 static Long
dis_MOVMSKPS_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
12374 Long delta
, Bool isAvx
)
12376 UChar modrm
= getUChar(delta
);
12377 UInt rG
= gregOfRexRM(pfx
,modrm
);
12378 UInt rE
= eregOfRexRM(pfx
,modrm
);
12379 IRTemp t0
= newTemp(Ity_I32
);
12380 IRTemp t1
= newTemp(Ity_I32
);
12381 IRTemp t2
= newTemp(Ity_I32
);
12382 IRTemp t3
= newTemp(Ity_I32
);
12384 assign( t0
, binop( Iop_And32
,
12385 binop(Iop_Shr32
, getXMMRegLane32(rE
,0), mkU8(31)),
12387 assign( t1
, binop( Iop_And32
,
12388 binop(Iop_Shr32
, getXMMRegLane32(rE
,1), mkU8(30)),
12390 assign( t2
, binop( Iop_And32
,
12391 binop(Iop_Shr32
, getXMMRegLane32(rE
,2), mkU8(29)),
12393 assign( t3
, binop( Iop_And32
,
12394 binop(Iop_Shr32
, getXMMRegLane32(rE
,3), mkU8(28)),
12396 putIReg32( rG
, binop(Iop_Or32
,
12397 binop(Iop_Or32
, mkexpr(t0
), mkexpr(t1
)),
12398 binop(Iop_Or32
, mkexpr(t2
), mkexpr(t3
)) ) );
12399 DIP("%smovmskps %s,%s\n", isAvx
? "v" : "",
12400 nameXMMReg(rE
), nameIReg32(rG
));
12405 static Long
dis_MOVMSKPS_256 ( const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
)
12407 UChar modrm
= getUChar(delta
);
12408 UInt rG
= gregOfRexRM(pfx
,modrm
);
12409 UInt rE
= eregOfRexRM(pfx
,modrm
);
12410 IRTemp t0
= newTemp(Ity_I32
);
12411 IRTemp t1
= newTemp(Ity_I32
);
12412 IRTemp t2
= newTemp(Ity_I32
);
12413 IRTemp t3
= newTemp(Ity_I32
);
12414 IRTemp t4
= newTemp(Ity_I32
);
12415 IRTemp t5
= newTemp(Ity_I32
);
12416 IRTemp t6
= newTemp(Ity_I32
);
12417 IRTemp t7
= newTemp(Ity_I32
);
12419 assign( t0
, binop( Iop_And32
,
12420 binop(Iop_Shr32
, getYMMRegLane32(rE
,0), mkU8(31)),
12422 assign( t1
, binop( Iop_And32
,
12423 binop(Iop_Shr32
, getYMMRegLane32(rE
,1), mkU8(30)),
12425 assign( t2
, binop( Iop_And32
,
12426 binop(Iop_Shr32
, getYMMRegLane32(rE
,2), mkU8(29)),
12428 assign( t3
, binop( Iop_And32
,
12429 binop(Iop_Shr32
, getYMMRegLane32(rE
,3), mkU8(28)),
12431 assign( t4
, binop( Iop_And32
,
12432 binop(Iop_Shr32
, getYMMRegLane32(rE
,4), mkU8(27)),
12434 assign( t5
, binop( Iop_And32
,
12435 binop(Iop_Shr32
, getYMMRegLane32(rE
,5), mkU8(26)),
12437 assign( t6
, binop( Iop_And32
,
12438 binop(Iop_Shr32
, getYMMRegLane32(rE
,6), mkU8(25)),
12440 assign( t7
, binop( Iop_And32
,
12441 binop(Iop_Shr32
, getYMMRegLane32(rE
,7), mkU8(24)),
12443 putIReg32( rG
, binop(Iop_Or32
,
12445 binop(Iop_Or32
, mkexpr(t0
), mkexpr(t1
)),
12446 binop(Iop_Or32
, mkexpr(t2
), mkexpr(t3
)) ),
12448 binop(Iop_Or32
, mkexpr(t4
), mkexpr(t5
)),
12449 binop(Iop_Or32
, mkexpr(t6
), mkexpr(t7
)) ) ) );
12450 DIP("vmovmskps %s,%s\n", nameYMMReg(rE
), nameIReg32(rG
));
12455 static Long
dis_MOVMSKPD_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
12456 Long delta
, Bool isAvx
)
12458 UChar modrm
= getUChar(delta
);
12459 UInt rG
= gregOfRexRM(pfx
,modrm
);
12460 UInt rE
= eregOfRexRM(pfx
,modrm
);
12461 IRTemp t0
= newTemp(Ity_I32
);
12462 IRTemp t1
= newTemp(Ity_I32
);
12464 assign( t0
, binop( Iop_And32
,
12465 binop(Iop_Shr32
, getXMMRegLane32(rE
,1), mkU8(31)),
12467 assign( t1
, binop( Iop_And32
,
12468 binop(Iop_Shr32
, getXMMRegLane32(rE
,3), mkU8(30)),
12470 putIReg32( rG
, binop(Iop_Or32
, mkexpr(t0
), mkexpr(t1
) ) );
12471 DIP("%smovmskpd %s,%s\n", isAvx
? "v" : "",
12472 nameXMMReg(rE
), nameIReg32(rG
));
12477 static Long
dis_MOVMSKPD_256 ( const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
)
12479 UChar modrm
= getUChar(delta
);
12480 UInt rG
= gregOfRexRM(pfx
,modrm
);
12481 UInt rE
= eregOfRexRM(pfx
,modrm
);
12482 IRTemp t0
= newTemp(Ity_I32
);
12483 IRTemp t1
= newTemp(Ity_I32
);
12484 IRTemp t2
= newTemp(Ity_I32
);
12485 IRTemp t3
= newTemp(Ity_I32
);
12487 assign( t0
, binop( Iop_And32
,
12488 binop(Iop_Shr32
, getYMMRegLane32(rE
,1), mkU8(31)),
12490 assign( t1
, binop( Iop_And32
,
12491 binop(Iop_Shr32
, getYMMRegLane32(rE
,3), mkU8(30)),
12493 assign( t2
, binop( Iop_And32
,
12494 binop(Iop_Shr32
, getYMMRegLane32(rE
,5), mkU8(29)),
12496 assign( t3
, binop( Iop_And32
,
12497 binop(Iop_Shr32
, getYMMRegLane32(rE
,7), mkU8(28)),
12499 putIReg32( rG
, binop(Iop_Or32
,
12500 binop(Iop_Or32
, mkexpr(t0
), mkexpr(t1
)),
12501 binop(Iop_Or32
, mkexpr(t2
), mkexpr(t3
)) ) );
12502 DIP("vmovmskps %s,%s\n", nameYMMReg(rE
), nameIReg32(rG
));
12507 /* Note, this also handles SSE(1) insns. */
12508 __attribute__((noinline
))
12510 Long
dis_ESC_0F__SSE2 ( Bool
* decode_OK
,
12511 const VexArchInfo
* archinfo
,
12512 const VexAbiInfo
* vbi
,
12513 Prefix pfx
, Int sz
, Long deltaIN
,
12516 IRTemp addr
= IRTemp_INVALID
;
12517 IRTemp t0
= IRTemp_INVALID
;
12518 IRTemp t1
= IRTemp_INVALID
;
12519 IRTemp t2
= IRTemp_INVALID
;
12520 IRTemp t3
= IRTemp_INVALID
;
12521 IRTemp t4
= IRTemp_INVALID
;
12522 IRTemp t5
= IRTemp_INVALID
;
12523 IRTemp t6
= IRTemp_INVALID
;
12528 *decode_OK
= False
;
12530 Long delta
= deltaIN
;
12531 UChar opc
= getUChar(delta
);
12536 if (have66noF2noF3(pfx
)
12537 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
12538 /* 66 0F 10 = MOVUPD -- move from E (mem or xmm) to G (xmm). */
12539 modrm
= getUChar(delta
);
12540 if (epartIsReg(modrm
)) {
12541 putXMMReg( gregOfRexRM(pfx
,modrm
),
12542 getXMMReg( eregOfRexRM(pfx
,modrm
) ));
12543 DIP("movupd %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
12544 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12547 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12548 putXMMReg( gregOfRexRM(pfx
,modrm
),
12549 loadLE(Ity_V128
, mkexpr(addr
)) );
12550 DIP("movupd %s,%s\n", dis_buf
,
12551 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12554 goto decode_success
;
12556 /* F2 0F 10 = MOVSD -- move 64 bits from E (mem or lo half xmm) to
12557 G (lo half xmm). If E is mem, upper half of G is zeroed out.
12558 If E is reg, upper half of G is unchanged. */
12559 if (haveF2no66noF3(pfx
)
12560 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8) ) {
12561 modrm
= getUChar(delta
);
12562 if (epartIsReg(modrm
)) {
12563 putXMMRegLane64( gregOfRexRM(pfx
,modrm
), 0,
12564 getXMMRegLane64( eregOfRexRM(pfx
,modrm
), 0 ));
12565 DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
12566 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12569 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12570 putXMMReg( gregOfRexRM(pfx
,modrm
), mkV128(0) );
12571 putXMMRegLane64( gregOfRexRM(pfx
,modrm
), 0,
12572 loadLE(Ity_I64
, mkexpr(addr
)) );
12573 DIP("movsd %s,%s\n", dis_buf
,
12574 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12577 goto decode_success
;
12579 /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G
12580 (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */
12581 if (haveF3no66noF2(pfx
)
12582 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
12583 modrm
= getUChar(delta
);
12584 if (epartIsReg(modrm
)) {
12585 putXMMRegLane32( gregOfRexRM(pfx
,modrm
), 0,
12586 getXMMRegLane32( eregOfRexRM(pfx
,modrm
), 0 ));
12587 DIP("movss %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
12588 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12591 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12592 putXMMReg( gregOfRexRM(pfx
,modrm
), mkV128(0) );
12593 putXMMRegLane32( gregOfRexRM(pfx
,modrm
), 0,
12594 loadLE(Ity_I32
, mkexpr(addr
)) );
12595 DIP("movss %s,%s\n", dis_buf
,
12596 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12599 goto decode_success
;
12601 /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */
12602 if (haveNo66noF2noF3(pfx
)
12603 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
12604 modrm
= getUChar(delta
);
12605 if (epartIsReg(modrm
)) {
12606 putXMMReg( gregOfRexRM(pfx
,modrm
),
12607 getXMMReg( eregOfRexRM(pfx
,modrm
) ));
12608 DIP("movups %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
12609 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12612 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12613 putXMMReg( gregOfRexRM(pfx
,modrm
),
12614 loadLE(Ity_V128
, mkexpr(addr
)) );
12615 DIP("movups %s,%s\n", dis_buf
,
12616 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12619 goto decode_success
;
12624 /* F2 0F 11 = MOVSD -- move 64 bits from G (lo half xmm) to E (mem
12625 or lo half xmm). */
12626 if (haveF2no66noF3(pfx
)
12627 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
12628 modrm
= getUChar(delta
);
12629 if (epartIsReg(modrm
)) {
12630 putXMMRegLane64( eregOfRexRM(pfx
,modrm
), 0,
12631 getXMMRegLane64( gregOfRexRM(pfx
,modrm
), 0 ));
12632 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
12633 nameXMMReg(eregOfRexRM(pfx
,modrm
)));
12636 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12637 storeLE( mkexpr(addr
),
12638 getXMMRegLane64(gregOfRexRM(pfx
,modrm
), 0) );
12639 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
12643 goto decode_success
;
12645 /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem
12647 if (haveF3no66noF2(pfx
) && sz
== 4) {
12648 modrm
= getUChar(delta
);
12649 if (epartIsReg(modrm
)) {
12650 /* fall through, we don't yet have a test case */
12652 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12653 storeLE( mkexpr(addr
),
12654 getXMMRegLane32(gregOfRexRM(pfx
,modrm
), 0) );
12655 DIP("movss %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
12658 goto decode_success
;
12661 /* 66 0F 11 = MOVUPD -- move from G (xmm) to E (mem or xmm). */
12662 if (have66noF2noF3(pfx
)
12663 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
12664 modrm
= getUChar(delta
);
12665 if (epartIsReg(modrm
)) {
12666 putXMMReg( eregOfRexRM(pfx
,modrm
),
12667 getXMMReg( gregOfRexRM(pfx
,modrm
) ) );
12668 DIP("movupd %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
12669 nameXMMReg(eregOfRexRM(pfx
,modrm
)));
12672 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12673 storeLE( mkexpr(addr
), getXMMReg(gregOfRexRM(pfx
,modrm
)) );
12674 DIP("movupd %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
12678 goto decode_success
;
12680 /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */
12681 if (haveNo66noF2noF3(pfx
)
12682 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
12683 modrm
= getUChar(delta
);
12684 if (epartIsReg(modrm
)) {
12685 /* fall through; awaiting test case */
12687 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12688 storeLE( mkexpr(addr
), getXMMReg(gregOfRexRM(pfx
,modrm
)) );
12689 DIP("movups %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
12692 goto decode_success
;
12698 /* 66 0F 12 = MOVLPD -- move from mem to low half of XMM. */
12699 /* Identical to MOVLPS ? */
12700 if (have66noF2noF3(pfx
)
12701 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
12702 modrm
= getUChar(delta
);
12703 if (epartIsReg(modrm
)) {
12704 /* fall through; apparently reg-reg is not possible */
12706 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12708 putXMMRegLane64( gregOfRexRM(pfx
,modrm
),
12710 loadLE(Ity_I64
, mkexpr(addr
)) );
12711 DIP("movlpd %s, %s\n",
12712 dis_buf
, nameXMMReg( gregOfRexRM(pfx
,modrm
) ));
12713 goto decode_success
;
12716 /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */
12717 /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */
12718 if (haveNo66noF2noF3(pfx
)
12719 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
12720 modrm
= getUChar(delta
);
12721 if (epartIsReg(modrm
)) {
12723 putXMMRegLane64( gregOfRexRM(pfx
,modrm
),
12725 getXMMRegLane64( eregOfRexRM(pfx
,modrm
), 1 ));
12726 DIP("movhlps %s, %s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
12727 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12729 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12731 putXMMRegLane64( gregOfRexRM(pfx
,modrm
), 0/*lower lane*/,
12732 loadLE(Ity_I64
, mkexpr(addr
)) );
12733 DIP("movlps %s, %s\n",
12734 dis_buf
, nameXMMReg( gregOfRexRM(pfx
,modrm
) ));
12736 goto decode_success
;
12741 /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */
12742 if (haveNo66noF2noF3(pfx
)
12743 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
12744 modrm
= getUChar(delta
);
12745 if (!epartIsReg(modrm
)) {
12746 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12748 storeLE( mkexpr(addr
),
12749 getXMMRegLane64( gregOfRexRM(pfx
,modrm
),
12750 0/*lower lane*/ ) );
12751 DIP("movlps %s, %s\n", nameXMMReg( gregOfRexRM(pfx
,modrm
) ),
12753 goto decode_success
;
12755 /* else fall through */
12757 /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */
12758 /* Identical to MOVLPS ? */
12759 if (have66noF2noF3(pfx
)
12760 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
12761 modrm
= getUChar(delta
);
12762 if (!epartIsReg(modrm
)) {
12763 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12765 storeLE( mkexpr(addr
),
12766 getXMMRegLane64( gregOfRexRM(pfx
,modrm
),
12767 0/*lower lane*/ ) );
12768 DIP("movlpd %s, %s\n", nameXMMReg( gregOfRexRM(pfx
,modrm
) ),
12770 goto decode_success
;
12772 /* else fall through */
12778 /* 0F 14 = UNPCKLPS -- unpack and interleave low part F32s */
12779 /* 0F 15 = UNPCKHPS -- unpack and interleave high part F32s */
12780 /* These just appear to be special cases of SHUFPS */
12781 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
12782 Bool hi
= toBool(opc
== 0x15);
12783 IRTemp sV
= newTemp(Ity_V128
);
12784 IRTemp dV
= newTemp(Ity_V128
);
12785 modrm
= getUChar(delta
);
12786 UInt rG
= gregOfRexRM(pfx
,modrm
);
12787 assign( dV
, getXMMReg(rG
) );
12788 if (epartIsReg(modrm
)) {
12789 UInt rE
= eregOfRexRM(pfx
,modrm
);
12790 assign( sV
, getXMMReg(rE
) );
12792 DIP("unpck%sps %s,%s\n", hi
? "h" : "l",
12793 nameXMMReg(rE
), nameXMMReg(rG
));
12795 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12796 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
12798 DIP("unpck%sps %s,%s\n", hi
? "h" : "l",
12799 dis_buf
, nameXMMReg(rG
));
12801 IRTemp res
= math_UNPCKxPS_128( sV
, dV
, hi
);
12802 putXMMReg( rG
, mkexpr(res
) );
12803 goto decode_success
;
12805 /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */
12806 /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */
12807 /* These just appear to be special cases of SHUFPS */
12808 if (have66noF2noF3(pfx
)
12809 && sz
== 2 /* could be 8 if rex also present */) {
12810 Bool hi
= toBool(opc
== 0x15);
12811 IRTemp sV
= newTemp(Ity_V128
);
12812 IRTemp dV
= newTemp(Ity_V128
);
12813 modrm
= getUChar(delta
);
12814 UInt rG
= gregOfRexRM(pfx
,modrm
);
12815 assign( dV
, getXMMReg(rG
) );
12816 if (epartIsReg(modrm
)) {
12817 UInt rE
= eregOfRexRM(pfx
,modrm
);
12818 assign( sV
, getXMMReg(rE
) );
12820 DIP("unpck%sps %s,%s\n", hi
? "h" : "l",
12821 nameXMMReg(rE
), nameXMMReg(rG
));
12823 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12824 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
12826 DIP("unpck%sps %s,%s\n", hi
? "h" : "l",
12827 dis_buf
, nameXMMReg(rG
));
12829 IRTemp res
= math_UNPCKxPD_128( sV
, dV
, hi
);
12830 putXMMReg( rG
, mkexpr(res
) );
12831 goto decode_success
;
12836 /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */
12837 /* These seems identical to MOVHPS. This instruction encoding is
12838 completely crazy. */
12839 if (have66noF2noF3(pfx
)
12840 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
12841 modrm
= getUChar(delta
);
12842 if (epartIsReg(modrm
)) {
12843 /* fall through; apparently reg-reg is not possible */
12845 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12847 putXMMRegLane64( gregOfRexRM(pfx
,modrm
), 1/*upper lane*/,
12848 loadLE(Ity_I64
, mkexpr(addr
)) );
12849 DIP("movhpd %s,%s\n", dis_buf
,
12850 nameXMMReg( gregOfRexRM(pfx
,modrm
) ));
12851 goto decode_success
;
12854 /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */
12855 /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */
12856 if (haveNo66noF2noF3(pfx
)
12857 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
12858 modrm
= getUChar(delta
);
12859 if (epartIsReg(modrm
)) {
12861 putXMMRegLane64( gregOfRexRM(pfx
,modrm
), 1/*upper lane*/,
12862 getXMMRegLane64( eregOfRexRM(pfx
,modrm
), 0 ) );
12863 DIP("movhps %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
12864 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12866 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12868 putXMMRegLane64( gregOfRexRM(pfx
,modrm
), 1/*upper lane*/,
12869 loadLE(Ity_I64
, mkexpr(addr
)) );
12870 DIP("movhps %s,%s\n", dis_buf
,
12871 nameXMMReg( gregOfRexRM(pfx
,modrm
) ));
12873 goto decode_success
;
12878 /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */
12879 if (haveNo66noF2noF3(pfx
)
12880 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
12881 modrm
= getUChar(delta
);
12882 if (!epartIsReg(modrm
)) {
12883 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12885 storeLE( mkexpr(addr
),
12886 getXMMRegLane64( gregOfRexRM(pfx
,modrm
),
12887 1/*upper lane*/ ) );
12888 DIP("movhps %s,%s\n", nameXMMReg( gregOfRexRM(pfx
,modrm
) ),
12890 goto decode_success
;
12892 /* else fall through */
12894 /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */
12895 /* Again, this seems identical to MOVHPS. */
12896 if (have66noF2noF3(pfx
)
12897 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
12898 modrm
= getUChar(delta
);
12899 if (!epartIsReg(modrm
)) {
12900 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12902 storeLE( mkexpr(addr
),
12903 getXMMRegLane64( gregOfRexRM(pfx
,modrm
),
12904 1/*upper lane*/ ) );
12905 DIP("movhpd %s,%s\n", nameXMMReg( gregOfRexRM(pfx
,modrm
) ),
12907 goto decode_success
;
12909 /* else fall through */
12914 /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */
12915 /* 0F 18 /1 = PREFETCH0 -- with various different hints */
12916 /* 0F 18 /2 = PREFETCH1 */
12917 /* 0F 18 /3 = PREFETCH2 */
12918 if (haveNo66noF2noF3(pfx
)
12919 && !epartIsReg(getUChar(delta
))
12920 && gregLO3ofRM(getUChar(delta
)) >= 0
12921 && gregLO3ofRM(getUChar(delta
)) <= 3) {
12922 const HChar
* hintstr
= "??";
12924 modrm
= getUChar(delta
);
12925 vassert(!epartIsReg(modrm
));
12927 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12930 switch (gregLO3ofRM(modrm
)) {
12931 case 0: hintstr
= "nta"; break;
12932 case 1: hintstr
= "t0"; break;
12933 case 2: hintstr
= "t1"; break;
12934 case 3: hintstr
= "t2"; break;
12935 default: vassert(0);
12938 DIP("prefetch%s %s\n", hintstr
, dis_buf
);
12939 goto decode_success
;
12944 /* 66 0F 28 = MOVAPD -- move from E (mem or xmm) to G (xmm). */
12945 if (have66noF2noF3(pfx
)
12946 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
12947 modrm
= getUChar(delta
);
12948 if (epartIsReg(modrm
)) {
12949 putXMMReg( gregOfRexRM(pfx
,modrm
),
12950 getXMMReg( eregOfRexRM(pfx
,modrm
) ));
12951 DIP("movapd %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
12952 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12955 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12956 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
12957 putXMMReg( gregOfRexRM(pfx
,modrm
),
12958 loadLE(Ity_V128
, mkexpr(addr
)) );
12959 DIP("movapd %s,%s\n", dis_buf
,
12960 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12963 goto decode_success
;
12965 /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */
12966 if (haveNo66noF2noF3(pfx
)
12967 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
12968 modrm
= getUChar(delta
);
12969 if (epartIsReg(modrm
)) {
12970 putXMMReg( gregOfRexRM(pfx
,modrm
),
12971 getXMMReg( eregOfRexRM(pfx
,modrm
) ));
12972 DIP("movaps %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
12973 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12976 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
12977 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
12978 putXMMReg( gregOfRexRM(pfx
,modrm
),
12979 loadLE(Ity_V128
, mkexpr(addr
)) );
12980 DIP("movaps %s,%s\n", dis_buf
,
12981 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
12984 goto decode_success
;
12989 /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */
12990 if (haveNo66noF2noF3(pfx
)
12991 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
12992 modrm
= getUChar(delta
);
12993 if (epartIsReg(modrm
)) {
12994 putXMMReg( eregOfRexRM(pfx
,modrm
),
12995 getXMMReg( gregOfRexRM(pfx
,modrm
) ));
12996 DIP("movaps %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
12997 nameXMMReg(eregOfRexRM(pfx
,modrm
)));
13000 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13001 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
13002 storeLE( mkexpr(addr
), getXMMReg(gregOfRexRM(pfx
,modrm
)) );
13003 DIP("movaps %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
13007 goto decode_success
;
13009 /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */
13010 if (have66noF2noF3(pfx
)
13011 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
13012 modrm
= getUChar(delta
);
13013 if (epartIsReg(modrm
)) {
13014 putXMMReg( eregOfRexRM(pfx
,modrm
),
13015 getXMMReg( gregOfRexRM(pfx
,modrm
) ) );
13016 DIP("movapd %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
13017 nameXMMReg(eregOfRexRM(pfx
,modrm
)));
13020 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13021 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
13022 storeLE( mkexpr(addr
), getXMMReg(gregOfRexRM(pfx
,modrm
)) );
13023 DIP("movapd %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
13027 goto decode_success
;
13032 /* 0F 2A = CVTPI2PS -- convert 2 x I32 in mem/mmx to 2 x F32 in low
13034 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13035 IRTemp arg64
= newTemp(Ity_I64
);
13036 IRTemp rmode
= newTemp(Ity_I32
);
13038 modrm
= getUChar(delta
);
13039 if (epartIsReg(modrm
)) {
13040 /* Only switch to MMX mode if the source is a MMX register.
13041 See comments on CVTPI2PD for details. Fixes #357059. */
13043 assign( arg64
, getMMXReg(eregLO3ofRM(modrm
)) );
13045 DIP("cvtpi2ps %s,%s\n", nameMMXReg(eregLO3ofRM(modrm
)),
13046 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13048 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13049 assign( arg64
, loadLE(Ity_I64
, mkexpr(addr
)) );
13051 DIP("cvtpi2ps %s,%s\n", dis_buf
,
13052 nameXMMReg(gregOfRexRM(pfx
,modrm
)) );
13055 assign( rmode
, get_sse_roundingmode() );
13058 gregOfRexRM(pfx
,modrm
), 0,
13059 binop(Iop_F64toF32
,
13061 unop(Iop_I32StoF64
,
13062 unop(Iop_64to32
, mkexpr(arg64
)) )) );
13065 gregOfRexRM(pfx
,modrm
), 1,
13066 binop(Iop_F64toF32
,
13068 unop(Iop_I32StoF64
,
13069 unop(Iop_64HIto32
, mkexpr(arg64
)) )) );
13071 goto decode_success
;
13073 /* F3 0F 2A = CVTSI2SS
13074 -- sz==4: convert I32 in mem/ireg to F32 in low quarter xmm
13075 -- sz==8: convert I64 in mem/ireg to F32 in low quarter xmm */
13076 if (haveF3no66noF2(pfx
) && (sz
== 4 || sz
== 8)) {
13077 IRTemp rmode
= newTemp(Ity_I32
);
13078 assign( rmode
, get_sse_roundingmode() );
13079 modrm
= getUChar(delta
);
13081 IRTemp arg32
= newTemp(Ity_I32
);
13082 if (epartIsReg(modrm
)) {
13083 assign( arg32
, getIReg32(eregOfRexRM(pfx
,modrm
)) );
13085 DIP("cvtsi2ss %s,%s\n", nameIReg32(eregOfRexRM(pfx
,modrm
)),
13086 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13088 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13089 assign( arg32
, loadLE(Ity_I32
, mkexpr(addr
)) );
13091 DIP("cvtsi2ss %s,%s\n", dis_buf
,
13092 nameXMMReg(gregOfRexRM(pfx
,modrm
)) );
13095 gregOfRexRM(pfx
,modrm
), 0,
13096 binop(Iop_F64toF32
,
13098 unop(Iop_I32StoF64
, mkexpr(arg32
)) ) );
13101 IRTemp arg64
= newTemp(Ity_I64
);
13102 if (epartIsReg(modrm
)) {
13103 assign( arg64
, getIReg64(eregOfRexRM(pfx
,modrm
)) );
13105 DIP("cvtsi2ssq %s,%s\n", nameIReg64(eregOfRexRM(pfx
,modrm
)),
13106 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13108 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13109 assign( arg64
, loadLE(Ity_I64
, mkexpr(addr
)) );
13111 DIP("cvtsi2ssq %s,%s\n", dis_buf
,
13112 nameXMMReg(gregOfRexRM(pfx
,modrm
)) );
13115 gregOfRexRM(pfx
,modrm
), 0,
13116 binop(Iop_F64toF32
,
13118 binop(Iop_I64StoF64
, mkexpr(rmode
), mkexpr(arg64
)) ) );
13120 goto decode_success
;
13122 /* F2 0F 2A = CVTSI2SD
13123 when sz==4 -- convert I32 in mem/ireg to F64 in low half xmm
13124 when sz==8 -- convert I64 in mem/ireg to F64 in low half xmm
13126 if (haveF2no66noF3(pfx
) && (sz
== 4 || sz
== 8)) {
13127 modrm
= getUChar(delta
);
13129 IRTemp arg32
= newTemp(Ity_I32
);
13130 if (epartIsReg(modrm
)) {
13131 assign( arg32
, getIReg32(eregOfRexRM(pfx
,modrm
)) );
13133 DIP("cvtsi2sdl %s,%s\n", nameIReg32(eregOfRexRM(pfx
,modrm
)),
13134 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13136 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13137 assign( arg32
, loadLE(Ity_I32
, mkexpr(addr
)) );
13139 DIP("cvtsi2sdl %s,%s\n", dis_buf
,
13140 nameXMMReg(gregOfRexRM(pfx
,modrm
)) );
13142 putXMMRegLane64F( gregOfRexRM(pfx
,modrm
), 0,
13143 unop(Iop_I32StoF64
, mkexpr(arg32
))
13147 IRTemp arg64
= newTemp(Ity_I64
);
13148 if (epartIsReg(modrm
)) {
13149 assign( arg64
, getIReg64(eregOfRexRM(pfx
,modrm
)) );
13151 DIP("cvtsi2sdq %s,%s\n", nameIReg64(eregOfRexRM(pfx
,modrm
)),
13152 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13154 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13155 assign( arg64
, loadLE(Ity_I64
, mkexpr(addr
)) );
13157 DIP("cvtsi2sdq %s,%s\n", dis_buf
,
13158 nameXMMReg(gregOfRexRM(pfx
,modrm
)) );
13161 gregOfRexRM(pfx
,modrm
),
13163 binop( Iop_I64StoF64
,
13164 get_sse_roundingmode(),
13169 goto decode_success
;
13171 /* 66 0F 2A = CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F64 in
13173 if (have66noF2noF3(pfx
) && sz
== 2) {
13174 IRTemp arg64
= newTemp(Ity_I64
);
13176 modrm
= getUChar(delta
);
13177 if (epartIsReg(modrm
)) {
13178 /* Only switch to MMX mode if the source is a MMX register.
13179 This is inconsistent with all other instructions which
13180 convert between XMM and (M64 or MMX), which always switch
13181 to MMX mode even if 64-bit operand is M64 and not MMX. At
13182 least, that's what the Intel docs seem to me to say.
13185 assign( arg64
, getMMXReg(eregLO3ofRM(modrm
)) );
13187 DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregLO3ofRM(modrm
)),
13188 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13190 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13191 assign( arg64
, loadLE(Ity_I64
, mkexpr(addr
)) );
13193 DIP("cvtpi2pd %s,%s\n", dis_buf
,
13194 nameXMMReg(gregOfRexRM(pfx
,modrm
)) );
13198 gregOfRexRM(pfx
,modrm
), 0,
13199 unop(Iop_I32StoF64
, unop(Iop_64to32
, mkexpr(arg64
)) )
13203 gregOfRexRM(pfx
,modrm
), 1,
13204 unop(Iop_I32StoF64
, unop(Iop_64HIto32
, mkexpr(arg64
)) )
13207 goto decode_success
;
13212 /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */
13213 /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */
13214 if ( (haveNo66noF2noF3(pfx
) && sz
== 4)
13215 || (have66noF2noF3(pfx
) && sz
== 2) ) {
13216 modrm
= getUChar(delta
);
13217 if (!epartIsReg(modrm
)) {
13218 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13219 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
13220 storeLE( mkexpr(addr
), getXMMReg(gregOfRexRM(pfx
,modrm
)) );
13221 DIP("movntp%s %s,%s\n", sz
==2 ? "d" : "s",
13223 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13225 goto decode_success
;
13227 /* else fall through */
13233 /* 0F 2D = CVTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
13234 I32 in mmx, according to prevailing SSE rounding mode */
13235 /* 0F 2C = CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
13236 I32 in mmx, rounding towards zero */
13237 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13238 IRTemp dst64
= newTemp(Ity_I64
);
13239 IRTemp rmode
= newTemp(Ity_I32
);
13240 IRTemp f32lo
= newTemp(Ity_F32
);
13241 IRTemp f32hi
= newTemp(Ity_F32
);
13242 Bool r2zero
= toBool(opc
== 0x2C);
13245 modrm
= getUChar(delta
);
13247 if (epartIsReg(modrm
)) {
13249 assign(f32lo
, getXMMRegLane32F(eregOfRexRM(pfx
,modrm
), 0));
13250 assign(f32hi
, getXMMRegLane32F(eregOfRexRM(pfx
,modrm
), 1));
13251 DIP("cvt%sps2pi %s,%s\n", r2zero
? "t" : "",
13252 nameXMMReg(eregOfRexRM(pfx
,modrm
)),
13253 nameMMXReg(gregLO3ofRM(modrm
)));
13255 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13256 assign(f32lo
, loadLE(Ity_F32
, mkexpr(addr
)));
13257 assign(f32hi
, loadLE(Ity_F32
, binop( Iop_Add64
,
13261 DIP("cvt%sps2pi %s,%s\n", r2zero
? "t" : "",
13263 nameMMXReg(gregLO3ofRM(modrm
)));
13267 assign(rmode
, mkU32((UInt
)Irrm_ZERO
) );
13269 assign( rmode
, get_sse_roundingmode() );
13274 binop( Iop_32HLto64
,
13275 binop( Iop_F64toI32S
,
13277 unop( Iop_F32toF64
, mkexpr(f32hi
) ) ),
13278 binop( Iop_F64toI32S
,
13280 unop( Iop_F32toF64
, mkexpr(f32lo
) ) )
13284 putMMXReg(gregLO3ofRM(modrm
), mkexpr(dst64
));
13285 goto decode_success
;
13287 /* F3 0F 2D = CVTSS2SI
13288 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg,
13289 according to prevailing SSE rounding mode
13290 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg,
13291 according to prevailing SSE rounding mode
13293 /* F3 0F 2C = CVTTSS2SI
13294 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg,
13295 truncating towards zero
13296 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg,
13297 truncating towards zero
13299 if (haveF3no66noF2(pfx
) && (sz
== 4 || sz
== 8)) {
13300 delta
= dis_CVTxSS2SI( vbi
, pfx
, delta
, False
/*!isAvx*/, opc
, sz
);
13301 goto decode_success
;
13303 /* F2 0F 2D = CVTSD2SI
13304 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg,
13305 according to prevailing SSE rounding mode
13306 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg,
13307 according to prevailing SSE rounding mode
13309 /* F2 0F 2C = CVTTSD2SI
13310 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg,
13311 truncating towards zero
13312 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg,
13313 truncating towards zero
13315 if (haveF2no66noF3(pfx
) && (sz
== 4 || sz
== 8)) {
13316 delta
= dis_CVTxSD2SI( vbi
, pfx
, delta
, False
/*!isAvx*/, opc
, sz
);
13317 goto decode_success
;
13319 /* 66 0F 2D = CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
13320 I32 in mmx, according to prevailing SSE rounding mode */
13321 /* 66 0F 2C = CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
13322 I32 in mmx, rounding towards zero */
13323 if (have66noF2noF3(pfx
) && sz
== 2) {
13324 IRTemp dst64
= newTemp(Ity_I64
);
13325 IRTemp rmode
= newTemp(Ity_I32
);
13326 IRTemp f64lo
= newTemp(Ity_F64
);
13327 IRTemp f64hi
= newTemp(Ity_F64
);
13328 Bool r2zero
= toBool(opc
== 0x2C);
13331 modrm
= getUChar(delta
);
13333 if (epartIsReg(modrm
)) {
13335 assign(f64lo
, getXMMRegLane64F(eregOfRexRM(pfx
,modrm
), 0));
13336 assign(f64hi
, getXMMRegLane64F(eregOfRexRM(pfx
,modrm
), 1));
13337 DIP("cvt%spd2pi %s,%s\n", r2zero
? "t" : "",
13338 nameXMMReg(eregOfRexRM(pfx
,modrm
)),
13339 nameMMXReg(gregLO3ofRM(modrm
)));
13341 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13342 assign(f64lo
, loadLE(Ity_F64
, mkexpr(addr
)));
13343 assign(f64hi
, loadLE(Ity_F64
, binop( Iop_Add64
,
13347 DIP("cvt%spf2pi %s,%s\n", r2zero
? "t" : "",
13349 nameMMXReg(gregLO3ofRM(modrm
)));
13353 assign(rmode
, mkU32((UInt
)Irrm_ZERO
) );
13355 assign( rmode
, get_sse_roundingmode() );
13360 binop( Iop_32HLto64
,
13361 binop( Iop_F64toI32S
, mkexpr(rmode
), mkexpr(f64hi
) ),
13362 binop( Iop_F64toI32S
, mkexpr(rmode
), mkexpr(f64lo
) )
13366 putMMXReg(gregLO3ofRM(modrm
), mkexpr(dst64
));
13367 goto decode_success
;
13373 /* 66 0F 2E = UCOMISD -- 64F0x2 comparison G,E, and set ZCP */
13374 /* 66 0F 2F = COMISD -- 64F0x2 comparison G,E, and set ZCP */
13375 if (have66noF2noF3(pfx
) && sz
== 2) {
13376 delta
= dis_COMISD( vbi
, pfx
, delta
, False
/*!isAvx*/, opc
);
13377 goto decode_success
;
13379 /* 0F 2E = UCOMISS -- 32F0x4 comparison G,E, and set ZCP */
13380 /* 0F 2F = COMISS -- 32F0x4 comparison G,E, and set ZCP */
13381 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13382 delta
= dis_COMISS( vbi
, pfx
, delta
, False
/*!isAvx*/, opc
);
13383 goto decode_success
;
13388 /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E)
13389 to 4 lowest bits of ireg(G) */
13390 if (haveNo66noF2noF3(pfx
) && (sz
== 4 || sz
== 8)
13391 && epartIsReg(getUChar(delta
))) {
13392 /* sz == 8 is a kludge to handle insns with REX.W redundantly
13393 set to 1, which has been known to happen:
13395 4c 0f 50 d9 rex64X movmskps %xmm1,%r11d
13397 20071106: Intel docs say that REX.W isn't redundant: when
13398 present, a 64-bit register is written; when not present, only
13399 the 32-bit half is written. However, testing on a Core2
13400 machine suggests the entire 64 bit register is written
13401 irrespective of the status of REX.W. That could be because
13402 of the default rule that says "if the lower half of a 32-bit
13403 register is written, the upper half is zeroed". By using
13404 putIReg32 here we inadvertantly produce the same behaviour as
13405 the Core2, for the same reason -- putIReg32 implements said
13408 AMD docs give no indication that REX.W is even valid for this
13410 delta
= dis_MOVMSKPS_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
13411 goto decode_success
;
13413 /* 66 0F 50 = MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to
13414 2 lowest bits of ireg(G) */
13415 if (have66noF2noF3(pfx
) && (sz
== 2 || sz
== 8)) {
13416 /* sz == 8 is a kludge to handle insns with REX.W redundantly
13417 set to 1, which has been known to happen:
13418 66 4c 0f 50 d9 rex64X movmskpd %xmm1,%r11d
13419 20071106: see further comments on MOVMSKPS implementation above.
13421 delta
= dis_MOVMSKPD_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
13422 goto decode_success
;
13427 /* F3 0F 51 = SQRTSS -- approx sqrt 32F0x4 from R/M to R */
13428 if (haveF3no66noF2(pfx
) && sz
== 4) {
13429 delta
= dis_SSE_E_to_G_unary_lo32( vbi
, pfx
, delta
,
13430 "sqrtss", Iop_Sqrt32F0x4
);
13431 goto decode_success
;
13433 /* 0F 51 = SQRTPS -- approx sqrt 32Fx4 from R/M to R */
13434 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13435 delta
= dis_SSE_E_to_G_unary_all( vbi
, pfx
, delta
,
13436 "sqrtps", Iop_Sqrt32Fx4
);
13437 goto decode_success
;
13439 /* F2 0F 51 = SQRTSD -- approx sqrt 64F0x2 from R/M to R */
13440 if (haveF2no66noF3(pfx
) && sz
== 4) {
13441 delta
= dis_SSE_E_to_G_unary_lo64( vbi
, pfx
, delta
,
13442 "sqrtsd", Iop_Sqrt64F0x2
);
13443 goto decode_success
;
13445 /* 66 0F 51 = SQRTPD -- approx sqrt 64Fx2 from R/M to R */
13446 if (have66noF2noF3(pfx
) && sz
== 2) {
13447 delta
= dis_SSE_E_to_G_unary_all( vbi
, pfx
, delta
,
13448 "sqrtpd", Iop_Sqrt64Fx2
);
13449 goto decode_success
;
13454 /* F3 0F 52 = RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/M to R */
13455 if (haveF3no66noF2(pfx
) && sz
== 4) {
13456 delta
= dis_SSE_E_to_G_unary_lo32( vbi
, pfx
, delta
,
13457 "rsqrtss", Iop_RSqrtEst32F0x4
);
13458 goto decode_success
;
13460 /* 0F 52 = RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to R */
13461 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13462 delta
= dis_SSE_E_to_G_unary_all( vbi
, pfx
, delta
,
13463 "rsqrtps", Iop_RSqrtEst32Fx4
);
13464 goto decode_success
;
13469 /* F3 0F 53 = RCPSS -- approx reciprocal 32F0x4 from R/M to R */
13470 if (haveF3no66noF2(pfx
) && sz
== 4) {
13471 delta
= dis_SSE_E_to_G_unary_lo32( vbi
, pfx
, delta
,
13472 "rcpss", Iop_RecipEst32F0x4
);
13473 goto decode_success
;
13475 /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */
13476 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13477 delta
= dis_SSE_E_to_G_unary_all( vbi
, pfx
, delta
,
13478 "rcpps", Iop_RecipEst32Fx4
);
13479 goto decode_success
;
13484 /* 0F 54 = ANDPS -- G = G and E */
13485 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13486 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "andps", Iop_AndV128
);
13487 goto decode_success
;
13489 /* 66 0F 54 = ANDPD -- G = G and E */
13490 if (have66noF2noF3(pfx
) && sz
== 2) {
13491 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "andpd", Iop_AndV128
);
13492 goto decode_success
;
13497 /* 0F 55 = ANDNPS -- G = (not G) and E */
13498 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13499 delta
= dis_SSE_E_to_G_all_invG( vbi
, pfx
, delta
, "andnps",
13501 goto decode_success
;
13503 /* 66 0F 55 = ANDNPD -- G = (not G) and E */
13504 if (have66noF2noF3(pfx
) && sz
== 2) {
13505 delta
= dis_SSE_E_to_G_all_invG( vbi
, pfx
, delta
, "andnpd",
13507 goto decode_success
;
13512 /* 0F 56 = ORPS -- G = G and E */
13513 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13514 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "orps", Iop_OrV128
);
13515 goto decode_success
;
13517 /* 66 0F 56 = ORPD -- G = G and E */
13518 if (have66noF2noF3(pfx
) && sz
== 2) {
13519 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "orpd", Iop_OrV128
);
13520 goto decode_success
;
13525 /* 66 0F 57 = XORPD -- G = G xor E */
13526 if (have66noF2noF3(pfx
) && sz
== 2) {
13527 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "xorpd", Iop_XorV128
);
13528 goto decode_success
;
13530 /* 0F 57 = XORPS -- G = G xor E */
13531 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13532 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "xorps", Iop_XorV128
);
13533 goto decode_success
;
13538 /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */
13539 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13540 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "addps", Iop_Add32Fx4
);
13541 goto decode_success
;
13543 /* F3 0F 58 = ADDSS -- add 32F0x4 from R/M to R */
13544 if (haveF3no66noF2(pfx
) && sz
== 4) {
13545 delta
= dis_SSE_E_to_G_lo32( vbi
, pfx
, delta
, "addss", Iop_Add32F0x4
);
13546 goto decode_success
;
13548 /* F2 0F 58 = ADDSD -- add 64F0x2 from R/M to R */
13549 if (haveF2no66noF3(pfx
)
13550 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
13551 delta
= dis_SSE_E_to_G_lo64( vbi
, pfx
, delta
, "addsd", Iop_Add64F0x2
);
13552 goto decode_success
;
13554 /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */
13555 if (have66noF2noF3(pfx
)
13556 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
13557 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "addpd", Iop_Add64Fx2
);
13558 goto decode_success
;
13563 /* F2 0F 59 = MULSD -- mul 64F0x2 from R/M to R */
13564 if (haveF2no66noF3(pfx
)
13565 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
13566 delta
= dis_SSE_E_to_G_lo64( vbi
, pfx
, delta
, "mulsd", Iop_Mul64F0x2
);
13567 goto decode_success
;
13569 /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */
13570 if (haveF3no66noF2(pfx
) && sz
== 4) {
13571 delta
= dis_SSE_E_to_G_lo32( vbi
, pfx
, delta
, "mulss", Iop_Mul32F0x4
);
13572 goto decode_success
;
13574 /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */
13575 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13576 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "mulps", Iop_Mul32Fx4
);
13577 goto decode_success
;
13579 /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */
13580 if (have66noF2noF3(pfx
)
13581 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
13582 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "mulpd", Iop_Mul64Fx2
);
13583 goto decode_success
;
13588 /* 0F 5A = CVTPS2PD -- convert 2 x F32 in low half mem/xmm to 2 x
13590 if (haveNo66noF2noF3(pfx
)
13591 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
13592 delta
= dis_CVTPS2PD_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
13593 goto decode_success
;
13595 /* F3 0F 5A = CVTSS2SD -- convert F32 in mem/low 1/4 xmm to F64 in
13597 if (haveF3no66noF2(pfx
) && sz
== 4) {
13598 IRTemp f32lo
= newTemp(Ity_F32
);
13600 modrm
= getUChar(delta
);
13601 if (epartIsReg(modrm
)) {
13603 assign(f32lo
, getXMMRegLane32F(eregOfRexRM(pfx
,modrm
), 0));
13604 DIP("cvtss2sd %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
13605 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13607 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13608 assign(f32lo
, loadLE(Ity_F32
, mkexpr(addr
)));
13610 DIP("cvtss2sd %s,%s\n", dis_buf
,
13611 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13614 putXMMRegLane64F( gregOfRexRM(pfx
,modrm
), 0,
13615 unop( Iop_F32toF64
, mkexpr(f32lo
) ) );
13617 goto decode_success
;
13619 /* F2 0F 5A = CVTSD2SS -- convert F64 in mem/low half xmm to F32 in
13620 low 1/4 xmm(G), according to prevailing SSE rounding mode */
13621 if (haveF2no66noF3(pfx
) && sz
== 4) {
13622 IRTemp rmode
= newTemp(Ity_I32
);
13623 IRTemp f64lo
= newTemp(Ity_F64
);
13625 modrm
= getUChar(delta
);
13626 if (epartIsReg(modrm
)) {
13628 assign(f64lo
, getXMMRegLane64F(eregOfRexRM(pfx
,modrm
), 0));
13629 DIP("cvtsd2ss %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
13630 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13632 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13633 assign(f64lo
, loadLE(Ity_F64
, mkexpr(addr
)));
13635 DIP("cvtsd2ss %s,%s\n", dis_buf
,
13636 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13639 assign( rmode
, get_sse_roundingmode() );
13641 gregOfRexRM(pfx
,modrm
), 0,
13642 binop( Iop_F64toF32
, mkexpr(rmode
), mkexpr(f64lo
) )
13645 goto decode_success
;
13647 /* 66 0F 5A = CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F32 in
13648 lo half xmm(G), rounding according to prevailing SSE rounding
13649 mode, and zero upper half */
13650 /* Note, this is practically identical to CVTPD2DQ. It would have
13651 be nice to merge them together. */
13652 if (have66noF2noF3(pfx
) && sz
== 2) {
13653 delta
= dis_CVTPD2PS_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
13654 goto decode_success
;
13659 /* F3 0F 5B = CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
13660 xmm(G), rounding towards zero */
13661 /* 66 0F 5B = CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
13662 xmm(G), as per the prevailing rounding mode */
13663 if ( (have66noF2noF3(pfx
) && sz
== 2)
13664 || (haveF3no66noF2(pfx
) && sz
== 4) ) {
13665 Bool r2zero
= toBool(sz
== 4); // FIXME -- unreliable (???)
13666 delta
= dis_CVTxPS2DQ_128( vbi
, pfx
, delta
, False
/*!isAvx*/, r2zero
);
13667 goto decode_success
;
13669 /* 0F 5B = CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 in
13671 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13672 delta
= dis_CVTDQ2PS_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
13673 goto decode_success
;
13678 /* F3 0F 5C = SUBSS -- sub 32F0x4 from R/M to R */
13679 if (haveF3no66noF2(pfx
) && sz
== 4) {
13680 delta
= dis_SSE_E_to_G_lo32( vbi
, pfx
, delta
, "subss", Iop_Sub32F0x4
);
13681 goto decode_success
;
13683 /* F2 0F 5C = SUBSD -- sub 64F0x2 from R/M to R */
13684 if (haveF2no66noF3(pfx
)
13685 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
13686 delta
= dis_SSE_E_to_G_lo64( vbi
, pfx
, delta
, "subsd", Iop_Sub64F0x2
);
13687 goto decode_success
;
13689 /* 0F 5C = SUBPS -- sub 32Fx4 from R/M to R */
13690 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13691 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "subps", Iop_Sub32Fx4
);
13692 goto decode_success
;
13694 /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */
13695 if (have66noF2noF3(pfx
) && sz
== 2) {
13696 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "subpd", Iop_Sub64Fx2
);
13697 goto decode_success
;
13702 /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */
13703 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13704 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "minps", Iop_Min32Fx4
);
13705 goto decode_success
;
13707 /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */
13708 if (haveF3no66noF2(pfx
) && sz
== 4) {
13709 delta
= dis_SSE_E_to_G_lo32( vbi
, pfx
, delta
, "minss", Iop_Min32F0x4
);
13710 goto decode_success
;
13712 /* F2 0F 5D = MINSD -- min 64F0x2 from R/M to R */
13713 if (haveF2no66noF3(pfx
)
13714 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
13715 delta
= dis_SSE_E_to_G_lo64( vbi
, pfx
, delta
, "minsd", Iop_Min64F0x2
);
13716 goto decode_success
;
13718 /* 66 0F 5D = MINPD -- min 64Fx2 from R/M to R */
13719 if (have66noF2noF3(pfx
) && sz
== 2) {
13720 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "minpd", Iop_Min64Fx2
);
13721 goto decode_success
;
13726 /* F2 0F 5E = DIVSD -- div 64F0x2 from R/M to R */
13727 if (haveF2no66noF3(pfx
) && sz
== 4) {
13728 delta
= dis_SSE_E_to_G_lo64( vbi
, pfx
, delta
, "divsd", Iop_Div64F0x2
);
13729 goto decode_success
;
13731 /* 0F 5E = DIVPS -- div 32Fx4 from R/M to R */
13732 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13733 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "divps", Iop_Div32Fx4
);
13734 goto decode_success
;
13736 /* F3 0F 5E = DIVSS -- div 32F0x4 from R/M to R */
13737 if (haveF3no66noF2(pfx
) && sz
== 4) {
13738 delta
= dis_SSE_E_to_G_lo32( vbi
, pfx
, delta
, "divss", Iop_Div32F0x4
);
13739 goto decode_success
;
13741 /* 66 0F 5E = DIVPD -- div 64Fx2 from R/M to R */
13742 if (have66noF2noF3(pfx
) && sz
== 2) {
13743 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "divpd", Iop_Div64Fx2
);
13744 goto decode_success
;
13749 /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */
13750 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
13751 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "maxps", Iop_Max32Fx4
);
13752 goto decode_success
;
13754 /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */
13755 if (haveF3no66noF2(pfx
) && sz
== 4) {
13756 delta
= dis_SSE_E_to_G_lo32( vbi
, pfx
, delta
, "maxss", Iop_Max32F0x4
);
13757 goto decode_success
;
13759 /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */
13760 if (haveF2no66noF3(pfx
)
13761 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
13762 delta
= dis_SSE_E_to_G_lo64( vbi
, pfx
, delta
, "maxsd", Iop_Max64F0x2
);
13763 goto decode_success
;
13765 /* 66 0F 5F = MAXPD -- max 64Fx2 from R/M to R */
13766 if (have66noF2noF3(pfx
) && sz
== 2) {
13767 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "maxpd", Iop_Max64Fx2
);
13768 goto decode_success
;
13773 /* 66 0F 60 = PUNPCKLBW */
13774 if (have66noF2noF3(pfx
) && sz
== 2) {
13775 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13777 Iop_InterleaveLO8x16
, True
);
13778 goto decode_success
;
13783 /* 66 0F 61 = PUNPCKLWD */
13784 if (have66noF2noF3(pfx
) && sz
== 2) {
13785 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13787 Iop_InterleaveLO16x8
, True
);
13788 goto decode_success
;
13793 /* 66 0F 62 = PUNPCKLDQ */
13794 if (have66noF2noF3(pfx
) && sz
== 2) {
13795 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13797 Iop_InterleaveLO32x4
, True
);
13798 goto decode_success
;
13803 /* 66 0F 63 = PACKSSWB */
13804 if (have66noF2noF3(pfx
) && sz
== 2) {
13805 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13807 Iop_QNarrowBin16Sto8Sx16
, True
);
13808 goto decode_success
;
13813 /* 66 0F 64 = PCMPGTB */
13814 if (have66noF2noF3(pfx
) && sz
== 2) {
13815 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13816 "pcmpgtb", Iop_CmpGT8Sx16
, False
);
13817 goto decode_success
;
13822 /* 66 0F 65 = PCMPGTW */
13823 if (have66noF2noF3(pfx
) && sz
== 2) {
13824 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13825 "pcmpgtw", Iop_CmpGT16Sx8
, False
);
13826 goto decode_success
;
13831 /* 66 0F 66 = PCMPGTD */
13832 if (have66noF2noF3(pfx
) && sz
== 2) {
13833 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13834 "pcmpgtd", Iop_CmpGT32Sx4
, False
);
13835 goto decode_success
;
13840 /* 66 0F 67 = PACKUSWB */
13841 if (have66noF2noF3(pfx
) && sz
== 2) {
13842 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13844 Iop_QNarrowBin16Sto8Ux16
, True
);
13845 goto decode_success
;
13850 /* 66 0F 68 = PUNPCKHBW */
13851 if (have66noF2noF3(pfx
) && sz
== 2) {
13852 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13854 Iop_InterleaveHI8x16
, True
);
13855 goto decode_success
;
13860 /* 66 0F 69 = PUNPCKHWD */
13861 if (have66noF2noF3(pfx
) && sz
== 2) {
13862 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13864 Iop_InterleaveHI16x8
, True
);
13865 goto decode_success
;
13870 /* 66 0F 6A = PUNPCKHDQ */
13871 if (have66noF2noF3(pfx
) && sz
== 2) {
13872 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13874 Iop_InterleaveHI32x4
, True
);
13875 goto decode_success
;
13880 /* 66 0F 6B = PACKSSDW */
13881 if (have66noF2noF3(pfx
) && sz
== 2) {
13882 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13884 Iop_QNarrowBin32Sto16Sx8
, True
);
13885 goto decode_success
;
13890 /* 66 0F 6C = PUNPCKLQDQ */
13891 if (have66noF2noF3(pfx
) && sz
== 2) {
13892 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13894 Iop_InterleaveLO64x2
, True
);
13895 goto decode_success
;
13900 /* 66 0F 6D = PUNPCKHQDQ */
13901 if (have66noF2noF3(pfx
) && sz
== 2) {
13902 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
13904 Iop_InterleaveHI64x2
, True
);
13905 goto decode_success
;
13910 /* 66 0F 6E = MOVD from ireg32/m32 to xmm lo 1/4,
13911 zeroing high 3/4 of xmm. */
13912 /* or from ireg64/m64 to xmm lo 1/2,
13913 zeroing high 1/2 of xmm. */
13914 if (have66noF2noF3(pfx
)) {
13915 vassert(sz
== 2 || sz
== 8);
13916 if (sz
== 2) sz
= 4;
13917 modrm
= getUChar(delta
);
13918 if (epartIsReg(modrm
)) {
13922 gregOfRexRM(pfx
,modrm
),
13923 unop( Iop_32UtoV128
, getIReg32(eregOfRexRM(pfx
,modrm
)) )
13925 DIP("movd %s, %s\n", nameIReg32(eregOfRexRM(pfx
,modrm
)),
13926 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13929 gregOfRexRM(pfx
,modrm
),
13930 unop( Iop_64UtoV128
, getIReg64(eregOfRexRM(pfx
,modrm
)) )
13932 DIP("movq %s, %s\n", nameIReg64(eregOfRexRM(pfx
,modrm
)),
13933 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13936 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13939 gregOfRexRM(pfx
,modrm
),
13941 ? unop( Iop_32UtoV128
,loadLE(Ity_I32
, mkexpr(addr
)) )
13942 : unop( Iop_64UtoV128
,loadLE(Ity_I64
, mkexpr(addr
)) )
13944 DIP("mov%c %s, %s\n", sz
== 4 ? 'd' : 'q', dis_buf
,
13945 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13947 goto decode_success
;
13952 if (have66noF2noF3(pfx
)
13953 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
13954 /* 66 0F 6F = MOVDQA -- move from E (mem or xmm) to G (xmm). */
13955 modrm
= getUChar(delta
);
13956 if (epartIsReg(modrm
)) {
13957 putXMMReg( gregOfRexRM(pfx
,modrm
),
13958 getXMMReg( eregOfRexRM(pfx
,modrm
) ));
13959 DIP("movdqa %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
13960 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13963 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13964 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
13965 putXMMReg( gregOfRexRM(pfx
,modrm
),
13966 loadLE(Ity_V128
, mkexpr(addr
)) );
13967 DIP("movdqa %s,%s\n", dis_buf
,
13968 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13971 goto decode_success
;
13973 if (haveF3no66noF2(pfx
)
13974 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
13975 /* F3 0F 6F = MOVDQU -- move from E (mem or xmm) to G (xmm). */
13976 modrm
= getUChar(delta
);
13977 if (epartIsReg(modrm
)) {
13978 putXMMReg( gregOfRexRM(pfx
,modrm
),
13979 getXMMReg( eregOfRexRM(pfx
,modrm
) ));
13980 DIP("movdqu %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
13981 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13984 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
13985 putXMMReg( gregOfRexRM(pfx
,modrm
),
13986 loadLE(Ity_V128
, mkexpr(addr
)) );
13987 DIP("movdqu %s,%s\n", dis_buf
,
13988 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
13991 goto decode_success
;
13996 /* 66 0F 70 = PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm) */
13997 if (have66noF2noF3(pfx
) && sz
== 2) {
13998 delta
= dis_PSHUFD_32x4( vbi
, pfx
, delta
, False
/*!writesYmm*/);
13999 goto decode_success
;
14001 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14002 /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */
14003 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14005 IRTemp sV
, dV
, s3
, s2
, s1
, s0
;
14006 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
14007 sV
= newTemp(Ity_I64
);
14008 dV
= newTemp(Ity_I64
);
14010 modrm
= getUChar(delta
);
14011 if (epartIsReg(modrm
)) {
14012 assign( sV
, getMMXReg(eregLO3ofRM(modrm
)) );
14013 order
= (Int
)getUChar(delta
+1);
14015 DIP("pshufw $%d,%s,%s\n", order
,
14016 nameMMXReg(eregLO3ofRM(modrm
)),
14017 nameMMXReg(gregLO3ofRM(modrm
)));
14019 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
,
14020 1/*extra byte after amode*/ );
14021 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
14022 order
= (Int
)getUChar(delta
+alen
);
14024 DIP("pshufw $%d,%s,%s\n", order
,
14026 nameMMXReg(gregLO3ofRM(modrm
)));
14028 breakup64to16s( sV
, &s3
, &s2
, &s1
, &s0
);
14030 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
14032 mk64from16s( SEL((order
>>6)&3), SEL((order
>>4)&3),
14033 SEL((order
>>2)&3), SEL((order
>>0)&3) )
14035 putMMXReg(gregLO3ofRM(modrm
), mkexpr(dV
));
14037 goto decode_success
;
14039 /* F2 0F 70 = PSHUFLW -- rearrange lower half 4x16 from E(xmm or
14040 mem) to G(xmm), and copy upper half */
14041 if (haveF2no66noF3(pfx
) && sz
== 4) {
14042 delta
= dis_PSHUFxW_128( vbi
, pfx
, delta
,
14043 False
/*!isAvx*/, False
/*!xIsH*/ );
14044 goto decode_success
;
14046 /* F3 0F 70 = PSHUFHW -- rearrange upper half 4x16 from E(xmm or
14047 mem) to G(xmm), and copy lower half */
14048 if (haveF3no66noF2(pfx
) && sz
== 4) {
14049 delta
= dis_PSHUFxW_128( vbi
, pfx
, delta
,
14050 False
/*!isAvx*/, True
/*xIsH*/ );
14051 goto decode_success
;
14056 /* 66 0F 71 /2 ib = PSRLW by immediate */
14057 if (have66noF2noF3(pfx
) && sz
== 2
14058 && epartIsReg(getUChar(delta
))
14059 && gregLO3ofRM(getUChar(delta
)) == 2) {
14060 delta
= dis_SSE_shiftE_imm( pfx
, delta
, "psrlw", Iop_ShrN16x8
);
14061 goto decode_success
;
14063 /* 66 0F 71 /4 ib = PSRAW by immediate */
14064 if (have66noF2noF3(pfx
) && sz
== 2
14065 && epartIsReg(getUChar(delta
))
14066 && gregLO3ofRM(getUChar(delta
)) == 4) {
14067 delta
= dis_SSE_shiftE_imm( pfx
, delta
, "psraw", Iop_SarN16x8
);
14068 goto decode_success
;
14070 /* 66 0F 71 /6 ib = PSLLW by immediate */
14071 if (have66noF2noF3(pfx
) && sz
== 2
14072 && epartIsReg(getUChar(delta
))
14073 && gregLO3ofRM(getUChar(delta
)) == 6) {
14074 delta
= dis_SSE_shiftE_imm( pfx
, delta
, "psllw", Iop_ShlN16x8
);
14075 goto decode_success
;
14080 /* 66 0F 72 /2 ib = PSRLD by immediate */
14081 if (have66noF2noF3(pfx
) && sz
== 2
14082 && epartIsReg(getUChar(delta
))
14083 && gregLO3ofRM(getUChar(delta
)) == 2) {
14084 delta
= dis_SSE_shiftE_imm( pfx
, delta
, "psrld", Iop_ShrN32x4
);
14085 goto decode_success
;
14087 /* 66 0F 72 /4 ib = PSRAD by immediate */
14088 if (have66noF2noF3(pfx
) && sz
== 2
14089 && epartIsReg(getUChar(delta
))
14090 && gregLO3ofRM(getUChar(delta
)) == 4) {
14091 delta
= dis_SSE_shiftE_imm( pfx
, delta
, "psrad", Iop_SarN32x4
);
14092 goto decode_success
;
14094 /* 66 0F 72 /6 ib = PSLLD by immediate */
14095 if (have66noF2noF3(pfx
) && sz
== 2
14096 && epartIsReg(getUChar(delta
))
14097 && gregLO3ofRM(getUChar(delta
)) == 6) {
14098 delta
= dis_SSE_shiftE_imm( pfx
, delta
, "pslld", Iop_ShlN32x4
);
14099 goto decode_success
;
14104 /* 66 0F 73 /3 ib = PSRLDQ by immediate */
14105 /* note, if mem case ever filled in, 1 byte after amode */
14106 if (have66noF2noF3(pfx
) && sz
== 2
14107 && epartIsReg(getUChar(delta
))
14108 && gregLO3ofRM(getUChar(delta
)) == 3) {
14109 Int imm
= (Int
)getUChar(delta
+1);
14110 Int reg
= eregOfRexRM(pfx
,getUChar(delta
));
14111 DIP("psrldq $%d,%s\n", imm
, nameXMMReg(reg
));
14113 IRTemp sV
= newTemp(Ity_V128
);
14114 assign( sV
, getXMMReg(reg
) );
14115 putXMMReg(reg
, mkexpr(math_PSRLDQ( sV
, imm
)));
14116 goto decode_success
;
14118 /* 66 0F 73 /7 ib = PSLLDQ by immediate */
14119 /* note, if mem case ever filled in, 1 byte after amode */
14120 if (have66noF2noF3(pfx
) && sz
== 2
14121 && epartIsReg(getUChar(delta
))
14122 && gregLO3ofRM(getUChar(delta
)) == 7) {
14123 Int imm
= (Int
)getUChar(delta
+1);
14124 Int reg
= eregOfRexRM(pfx
,getUChar(delta
));
14125 DIP("pslldq $%d,%s\n", imm
, nameXMMReg(reg
));
14126 vassert(imm
>= 0 && imm
<= 255);
14128 IRTemp sV
= newTemp(Ity_V128
);
14129 assign( sV
, getXMMReg(reg
) );
14130 putXMMReg(reg
, mkexpr(math_PSLLDQ( sV
, imm
)));
14131 goto decode_success
;
14133 /* 66 0F 73 /2 ib = PSRLQ by immediate */
14134 if (have66noF2noF3(pfx
) && sz
== 2
14135 && epartIsReg(getUChar(delta
))
14136 && gregLO3ofRM(getUChar(delta
)) == 2) {
14137 delta
= dis_SSE_shiftE_imm( pfx
, delta
, "psrlq", Iop_ShrN64x2
);
14138 goto decode_success
;
14140 /* 66 0F 73 /6 ib = PSLLQ by immediate */
14141 if (have66noF2noF3(pfx
) && sz
== 2
14142 && epartIsReg(getUChar(delta
))
14143 && gregLO3ofRM(getUChar(delta
)) == 6) {
14144 delta
= dis_SSE_shiftE_imm( pfx
, delta
, "psllq", Iop_ShlN64x2
);
14145 goto decode_success
;
14150 /* 66 0F 74 = PCMPEQB */
14151 if (have66noF2noF3(pfx
) && sz
== 2) {
14152 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14153 "pcmpeqb", Iop_CmpEQ8x16
, False
);
14154 goto decode_success
;
14159 /* 66 0F 75 = PCMPEQW */
14160 if (have66noF2noF3(pfx
) && sz
== 2) {
14161 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14162 "pcmpeqw", Iop_CmpEQ16x8
, False
);
14163 goto decode_success
;
14168 /* 66 0F 76 = PCMPEQD */
14169 if (have66noF2noF3(pfx
) && sz
== 2) {
14170 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14171 "pcmpeqd", Iop_CmpEQ32x4
, False
);
14172 goto decode_success
;
14177 /* F3 0F 7E = MOVQ -- move 64 bits from E (mem or lo half xmm) to
14178 G (lo half xmm). Upper half of G is zeroed out. */
14179 if (haveF3no66noF2(pfx
)
14180 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
14181 modrm
= getUChar(delta
);
14182 if (epartIsReg(modrm
)) {
14183 putXMMRegLane64( gregOfRexRM(pfx
,modrm
), 0,
14184 getXMMRegLane64( eregOfRexRM(pfx
,modrm
), 0 ));
14185 /* zero bits 127:64 */
14186 putXMMRegLane64( gregOfRexRM(pfx
,modrm
), 1, mkU64(0) );
14187 DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
14188 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
14191 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
14192 putXMMReg( gregOfRexRM(pfx
,modrm
), mkV128(0) );
14193 putXMMRegLane64( gregOfRexRM(pfx
,modrm
), 0,
14194 loadLE(Ity_I64
, mkexpr(addr
)) );
14195 DIP("movsd %s,%s\n", dis_buf
,
14196 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
14199 goto decode_success
;
14201 /* 66 0F 7E = MOVD from xmm low 1/4 to ireg32 or m32. */
14202 /* or from xmm low 1/2 to ireg64 or m64. */
14203 if (have66noF2noF3(pfx
) && (sz
== 2 || sz
== 8)) {
14204 if (sz
== 2) sz
= 4;
14205 modrm
= getUChar(delta
);
14206 if (epartIsReg(modrm
)) {
14209 putIReg32( eregOfRexRM(pfx
,modrm
),
14210 getXMMRegLane32(gregOfRexRM(pfx
,modrm
), 0) );
14211 DIP("movd %s, %s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
14212 nameIReg32(eregOfRexRM(pfx
,modrm
)));
14214 putIReg64( eregOfRexRM(pfx
,modrm
),
14215 getXMMRegLane64(gregOfRexRM(pfx
,modrm
), 0) );
14216 DIP("movq %s, %s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
14217 nameIReg64(eregOfRexRM(pfx
,modrm
)));
14220 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
14222 storeLE( mkexpr(addr
),
14224 ? getXMMRegLane32(gregOfRexRM(pfx
,modrm
),0)
14225 : getXMMRegLane64(gregOfRexRM(pfx
,modrm
),0) );
14226 DIP("mov%c %s, %s\n", sz
== 4 ? 'd' : 'q',
14227 nameXMMReg(gregOfRexRM(pfx
,modrm
)), dis_buf
);
14229 goto decode_success
;
14234 /* F3 0F 7F = MOVDQU -- move from G (xmm) to E (mem or xmm). */
14235 if (haveF3no66noF2(pfx
)
14236 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
14237 modrm
= getUChar(delta
);
14238 if (epartIsReg(modrm
)) {
14239 goto decode_failure
; /* awaiting test case */
14241 putXMMReg( eregOfRexRM(pfx
,modrm
),
14242 getXMMReg(gregOfRexRM(pfx
,modrm
)) );
14243 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
14244 nameXMMReg(eregOfRexRM(pfx
,modrm
)));
14246 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
14248 storeLE( mkexpr(addr
), getXMMReg(gregOfRexRM(pfx
,modrm
)) );
14249 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)), dis_buf
);
14251 goto decode_success
;
14253 /* 66 0F 7F = MOVDQA -- move from G (xmm) to E (mem or xmm). */
14254 if (have66noF2noF3(pfx
) && sz
== 2) {
14255 modrm
= getUChar(delta
);
14256 if (epartIsReg(modrm
)) {
14258 putXMMReg( eregOfRexRM(pfx
,modrm
),
14259 getXMMReg(gregOfRexRM(pfx
,modrm
)) );
14260 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)),
14261 nameXMMReg(eregOfRexRM(pfx
,modrm
)));
14263 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
14264 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
14266 storeLE( mkexpr(addr
), getXMMReg(gregOfRexRM(pfx
,modrm
)) );
14267 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)), dis_buf
);
14269 goto decode_success
;
14274 /* 0F AE /7 = SFENCE -- flush pending operations to memory */
14275 if (haveNo66noF2noF3(pfx
)
14276 && epartIsReg(getUChar(delta
)) && gregLO3ofRM(getUChar(delta
)) == 7
14279 /* Insert a memory fence. It's sometimes important that these
14280 are carried through to the generated code. */
14281 stmt( IRStmt_MBE(Imbe_Fence
) );
14283 goto decode_success
;
14285 /* mindless duplication follows .. */
14286 /* 0F AE /5 = LFENCE -- flush pending operations to memory */
14287 /* 0F AE /6 = MFENCE -- flush pending operations to memory */
14288 if (haveNo66noF2noF3(pfx
)
14289 && epartIsReg(getUChar(delta
))
14290 && (gregLO3ofRM(getUChar(delta
)) == 5
14291 || gregLO3ofRM(getUChar(delta
)) == 6)
14294 /* Insert a memory fence. It's sometimes important that these
14295 are carried through to the generated code. */
14296 stmt( IRStmt_MBE(Imbe_Fence
) );
14297 DIP("%sfence\n", gregLO3ofRM(getUChar(delta
-1))==5 ? "l" : "m");
14298 goto decode_success
;
14301 /* 0F AE /7 = CLFLUSH -- flush cache line */
14302 if (haveNo66noF2noF3(pfx
)
14303 && !epartIsReg(getUChar(delta
)) && gregLO3ofRM(getUChar(delta
)) == 7
14306 /* This is something of a hack. We need to know the size of
14307 the cache line containing addr. Since we don't (easily),
14308 assume 256 on the basis that no real cache would have a
14309 line that big. It's safe to invalidate more stuff than we
14310 need, just inefficient. */
14311 ULong lineszB
= 256ULL;
14313 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
14316 /* Round addr down to the start of the containing block. */
14321 mkU64( ~(lineszB
-1) ))) );
14323 stmt( IRStmt_Put(OFFB_CMLEN
, mkU64(lineszB
) ) );
14325 jmp_lit(dres
, Ijk_InvalICache
, (Addr64
)(guest_RIP_bbstart
+delta
));
14327 DIP("clflush %s\n", dis_buf
);
14328 goto decode_success
;
14331 /* 0F AE /3 = STMXCSR m32 -- store %mxcsr */
14332 if (haveNo66noF2noF3(pfx
)
14333 && !epartIsReg(getUChar(delta
)) && gregLO3ofRM(getUChar(delta
)) == 3
14335 delta
= dis_STMXCSR(vbi
, pfx
, delta
, False
/*!isAvx*/);
14336 goto decode_success
;
14338 /* 0F AE /2 = LDMXCSR m32 -- load %mxcsr */
14339 if (haveNo66noF2noF3(pfx
)
14340 && !epartIsReg(getUChar(delta
)) && gregLO3ofRM(getUChar(delta
)) == 2
14342 delta
= dis_LDMXCSR(vbi
, pfx
, delta
, False
/*!isAvx*/);
14343 goto decode_success
;
14345 /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory */
14346 if (haveNo66noF2noF3(pfx
) && (sz
== 4 || sz
== 8)
14347 && !epartIsReg(getUChar(delta
))
14348 && gregOfRexRM(pfx
,getUChar(delta
)) == 0) {
14349 delta
= dis_FXSAVE(vbi
, pfx
, delta
, sz
);
14350 goto decode_success
;
14352 /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory */
14353 if (haveNo66noF2noF3(pfx
) && (sz
== 4 || sz
== 8)
14354 && !epartIsReg(getUChar(delta
))
14355 && gregOfRexRM(pfx
,getUChar(delta
)) == 1) {
14356 delta
= dis_FXRSTOR(vbi
, pfx
, delta
, sz
);
14357 goto decode_success
;
14359 /* 0F AE /4 = XSAVE mem -- write x87, SSE, AVX state to memory */
14360 if (haveNo66noF2noF3(pfx
) && (sz
== 4 || sz
== 8)
14361 && !epartIsReg(getUChar(delta
))
14362 && gregOfRexRM(pfx
,getUChar(delta
)) == 4
14363 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
)) {
14364 delta
= dis_XSAVE(vbi
, pfx
, delta
, sz
);
14365 goto decode_success
;
14367 /* 0F AE /5 = XRSTOR mem -- read x87, SSE, AVX state from memory */
14368 if (haveNo66noF2noF3(pfx
) && (sz
== 4 || sz
== 8)
14369 && !epartIsReg(getUChar(delta
))
14370 && gregOfRexRM(pfx
,getUChar(delta
)) == 5
14371 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
)) {
14372 delta
= dis_XRSTOR(vbi
, pfx
, delta
, sz
);
14373 goto decode_success
;
14378 /* 0F C2 = CMPPS -- 32Fx4 comparison from R/M to R */
14379 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14380 Long delta0
= delta
;
14381 delta
= dis_SSE_cmp_E_to_G( vbi
, pfx
, delta
, "cmpps", True
, 4 );
14382 if (delta
> delta0
) goto decode_success
;
14384 /* F3 0F C2 = CMPSS -- 32F0x4 comparison from R/M to R */
14385 if (haveF3no66noF2(pfx
) && sz
== 4) {
14386 Long delta0
= delta
;
14387 delta
= dis_SSE_cmp_E_to_G( vbi
, pfx
, delta
, "cmpss", False
, 4 );
14388 if (delta
> delta0
) goto decode_success
;
14390 /* F2 0F C2 = CMPSD -- 64F0x2 comparison from R/M to R */
14391 if (haveF2no66noF3(pfx
) && sz
== 4) {
14392 Long delta0
= delta
;
14393 delta
= dis_SSE_cmp_E_to_G( vbi
, pfx
, delta
, "cmpsd", False
, 8 );
14394 if (delta
> delta0
) goto decode_success
;
14396 /* 66 0F C2 = CMPPD -- 64Fx2 comparison from R/M to R */
14397 if (have66noF2noF3(pfx
) && sz
== 2) {
14398 Long delta0
= delta
;
14399 delta
= dis_SSE_cmp_E_to_G( vbi
, pfx
, delta
, "cmppd", True
, 8 );
14400 if (delta
> delta0
) goto decode_success
;
14405 /* 0F C3 = MOVNTI -- for us, just a plain ireg store. */
14406 if (haveNo66noF2noF3(pfx
) && (sz
== 4 || sz
== 8)) {
14407 modrm
= getUChar(delta
);
14408 if (!epartIsReg(modrm
)) {
14409 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
14410 storeLE( mkexpr(addr
), getIRegG(sz
, pfx
, modrm
) );
14411 DIP("movnti %s,%s\n", dis_buf
,
14412 nameIRegG(sz
, pfx
, modrm
));
14414 goto decode_success
;
14416 /* else fall through */
14421 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14422 /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
14423 put it into the specified lane of mmx(G). */
14424 if (haveNo66noF2noF3(pfx
)
14425 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
14426 /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the
14427 mmx reg. t4 is the new lane value. t5 is the original
14428 mmx value. t6 is the new mmx value. */
14430 t4
= newTemp(Ity_I16
);
14431 t5
= newTemp(Ity_I64
);
14432 t6
= newTemp(Ity_I64
);
14433 modrm
= getUChar(delta
);
14436 assign(t5
, getMMXReg(gregLO3ofRM(modrm
)));
14437 breakup64to16s( t5
, &t3
, &t2
, &t1
, &t0
);
14439 if (epartIsReg(modrm
)) {
14440 assign(t4
, getIReg16(eregOfRexRM(pfx
,modrm
)));
14442 lane
= getUChar(delta
-1);
14443 DIP("pinsrw $%d,%s,%s\n", lane
,
14444 nameIReg16(eregOfRexRM(pfx
,modrm
)),
14445 nameMMXReg(gregLO3ofRM(modrm
)));
14447 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
14449 lane
= getUChar(delta
-1);
14450 assign(t4
, loadLE(Ity_I16
, mkexpr(addr
)));
14451 DIP("pinsrw $%d,%s,%s\n", lane
,
14453 nameMMXReg(gregLO3ofRM(modrm
)));
14456 switch (lane
& 3) {
14457 case 0: assign(t6
, mk64from16s(t3
,t2
,t1
,t4
)); break;
14458 case 1: assign(t6
, mk64from16s(t3
,t2
,t4
,t0
)); break;
14459 case 2: assign(t6
, mk64from16s(t3
,t4
,t1
,t0
)); break;
14460 case 3: assign(t6
, mk64from16s(t4
,t2
,t1
,t0
)); break;
14461 default: vassert(0);
14463 putMMXReg(gregLO3ofRM(modrm
), mkexpr(t6
));
14464 goto decode_success
;
14466 /* 66 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
14467 put it into the specified lane of xmm(G). */
14468 if (have66noF2noF3(pfx
)
14469 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
14471 t4
= newTemp(Ity_I16
);
14472 modrm
= getUChar(delta
);
14473 UInt rG
= gregOfRexRM(pfx
,modrm
);
14474 if (epartIsReg(modrm
)) {
14475 UInt rE
= eregOfRexRM(pfx
,modrm
);
14476 assign(t4
, getIReg16(rE
));
14478 lane
= getUChar(delta
-1);
14479 DIP("pinsrw $%d,%s,%s\n",
14480 lane
, nameIReg16(rE
), nameXMMReg(rG
));
14482 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
,
14483 1/*byte after the amode*/ );
14485 lane
= getUChar(delta
-1);
14486 assign(t4
, loadLE(Ity_I16
, mkexpr(addr
)));
14487 DIP("pinsrw $%d,%s,%s\n",
14488 lane
, dis_buf
, nameXMMReg(rG
));
14490 IRTemp src_vec
= newTemp(Ity_V128
);
14491 assign(src_vec
, getXMMReg(rG
));
14492 IRTemp res_vec
= math_PINSRW_128( src_vec
, t4
, lane
& 7);
14493 putXMMReg(rG
, mkexpr(res_vec
));
14494 goto decode_success
;
14499 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14500 /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put
14501 zero-extend of it in ireg(G). */
14502 if (haveNo66noF2noF3(pfx
) && (sz
== 4 || sz
== 8)) {
14503 modrm
= getUChar(delta
);
14504 if (epartIsReg(modrm
)) {
14505 IRTemp sV
= newTemp(Ity_I64
);
14506 t5
= newTemp(Ity_I16
);
14508 assign(sV
, getMMXReg(eregLO3ofRM(modrm
)));
14509 breakup64to16s( sV
, &t3
, &t2
, &t1
, &t0
);
14510 switch (getUChar(delta
+1) & 3) {
14511 case 0: assign(t5
, mkexpr(t0
)); break;
14512 case 1: assign(t5
, mkexpr(t1
)); break;
14513 case 2: assign(t5
, mkexpr(t2
)); break;
14514 case 3: assign(t5
, mkexpr(t3
)); break;
14515 default: vassert(0);
14518 putIReg64(gregOfRexRM(pfx
,modrm
), unop(Iop_16Uto64
, mkexpr(t5
)));
14520 putIReg32(gregOfRexRM(pfx
,modrm
), unop(Iop_16Uto32
, mkexpr(t5
)));
14521 DIP("pextrw $%d,%s,%s\n",
14522 (Int
)getUChar(delta
+1),
14523 nameMMXReg(eregLO3ofRM(modrm
)),
14524 sz
==8 ? nameIReg64(gregOfRexRM(pfx
,modrm
))
14525 : nameIReg32(gregOfRexRM(pfx
,modrm
))
14528 goto decode_success
;
14530 /* else fall through */
14531 /* note, for anyone filling in the mem case: this insn has one
14532 byte after the amode and therefore you must pass 1 as the
14533 last arg to disAMode */
14535 /* 66 0F C5 = PEXTRW -- extract 16-bit field from xmm(E) and put
14536 zero-extend of it in ireg(G). */
14537 if (have66noF2noF3(pfx
)
14538 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
14539 Long delta0
= delta
;
14540 delta
= dis_PEXTRW_128_EregOnly_toG( vbi
, pfx
, delta
,
14542 if (delta
> delta0
) goto decode_success
;
14543 /* else fall through -- decoding has failed */
14548 /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */
14549 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14551 IRTemp sV
= newTemp(Ity_V128
);
14552 IRTemp dV
= newTemp(Ity_V128
);
14553 modrm
= getUChar(delta
);
14554 UInt rG
= gregOfRexRM(pfx
,modrm
);
14555 assign( dV
, getXMMReg(rG
) );
14556 if (epartIsReg(modrm
)) {
14557 UInt rE
= eregOfRexRM(pfx
,modrm
);
14558 assign( sV
, getXMMReg(rE
) );
14559 imm8
= (Int
)getUChar(delta
+1);
14561 DIP("shufps $%d,%s,%s\n", imm8
, nameXMMReg(rE
), nameXMMReg(rG
));
14563 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
14564 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
14565 imm8
= (Int
)getUChar(delta
+alen
);
14567 DIP("shufps $%d,%s,%s\n", imm8
, dis_buf
, nameXMMReg(rG
));
14569 IRTemp res
= math_SHUFPS_128( sV
, dV
, imm8
);
14570 putXMMReg( gregOfRexRM(pfx
,modrm
), mkexpr(res
) );
14571 goto decode_success
;
14573 /* 66 0F C6 /r ib = SHUFPD -- shuffle packed F64s */
14574 if (have66noF2noF3(pfx
) && sz
== 2) {
14576 IRTemp sV
= newTemp(Ity_V128
);
14577 IRTemp dV
= newTemp(Ity_V128
);
14579 modrm
= getUChar(delta
);
14580 assign( dV
, getXMMReg(gregOfRexRM(pfx
,modrm
)) );
14582 if (epartIsReg(modrm
)) {
14583 assign( sV
, getXMMReg(eregOfRexRM(pfx
,modrm
)) );
14584 select
= (Int
)getUChar(delta
+1);
14586 DIP("shufpd $%d,%s,%s\n", select
,
14587 nameXMMReg(eregOfRexRM(pfx
,modrm
)),
14588 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
14590 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
14591 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
14592 select
= getUChar(delta
+alen
);
14594 DIP("shufpd $%d,%s,%s\n", select
,
14596 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
14599 IRTemp res
= math_SHUFPD_128( sV
, dV
, select
);
14600 putXMMReg( gregOfRexRM(pfx
,modrm
), mkexpr(res
) );
14601 goto decode_success
;
14606 /* 66 0F D1 = PSRLW by E */
14607 if (have66noF2noF3(pfx
) && sz
== 2) {
14608 delta
= dis_SSE_shiftG_byE( vbi
, pfx
, delta
, "psrlw", Iop_ShrN16x8
);
14609 goto decode_success
;
14614 /* 66 0F D2 = PSRLD by E */
14615 if (have66noF2noF3(pfx
) && sz
== 2) {
14616 delta
= dis_SSE_shiftG_byE( vbi
, pfx
, delta
, "psrld", Iop_ShrN32x4
);
14617 goto decode_success
;
14622 /* 66 0F D3 = PSRLQ by E */
14623 if (have66noF2noF3(pfx
) && sz
== 2) {
14624 delta
= dis_SSE_shiftG_byE( vbi
, pfx
, delta
, "psrlq", Iop_ShrN64x2
);
14625 goto decode_success
;
14630 /* 66 0F D4 = PADDQ */
14631 if (have66noF2noF3(pfx
) && sz
== 2) {
14632 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14633 "paddq", Iop_Add64x2
, False
);
14634 goto decode_success
;
14636 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
14637 /* 0F D4 = PADDQ -- add 64x1 */
14638 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14640 delta
= dis_MMXop_regmem_to_reg (
14641 vbi
, pfx
, delta
, opc
, "paddq", False
);
14642 goto decode_success
;
14647 /* 66 0F D5 = PMULLW -- 16x8 multiply */
14648 if (have66noF2noF3(pfx
) && sz
== 2) {
14649 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14650 "pmullw", Iop_Mul16x8
, False
);
14651 goto decode_success
;
14656 /* F3 0F D6 = MOVQ2DQ -- move from E (mmx) to G (lo half xmm, zero
14658 if (haveF3no66noF2(pfx
) && sz
== 4) {
14659 modrm
= getUChar(delta
);
14660 if (epartIsReg(modrm
)) {
14662 putXMMReg( gregOfRexRM(pfx
,modrm
),
14663 unop(Iop_64UtoV128
, getMMXReg( eregLO3ofRM(modrm
) )) );
14664 DIP("movq2dq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm
)),
14665 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
14667 goto decode_success
;
14669 /* apparently no mem case for this insn */
14671 /* 66 0F D6 = MOVQ -- move 64 bits from G (lo half xmm) to E (mem
14672 or lo half xmm). */
14673 if (have66noF2noF3(pfx
)
14674 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
14675 modrm
= getUChar(delta
);
14676 if (epartIsReg(modrm
)) {
14677 /* fall through, awaiting test case */
14678 /* dst: lo half copied, hi half zeroed */
14680 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
14681 storeLE( mkexpr(addr
),
14682 getXMMRegLane64( gregOfRexRM(pfx
,modrm
), 0 ));
14683 DIP("movq %s,%s\n", nameXMMReg(gregOfRexRM(pfx
,modrm
)), dis_buf
);
14685 goto decode_success
;
14688 /* F2 0F D6 = MOVDQ2Q -- move from E (lo half xmm, not mem) to G (mmx). */
14689 if (haveF2no66noF3(pfx
) && sz
== 4) {
14690 modrm
= getUChar(delta
);
14691 if (epartIsReg(modrm
)) {
14693 putMMXReg( gregLO3ofRM(modrm
),
14694 getXMMRegLane64( eregOfRexRM(pfx
,modrm
), 0 ));
14695 DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
14696 nameMMXReg(gregLO3ofRM(modrm
)));
14698 goto decode_success
;
14700 /* apparently no mem case for this insn */
14705 /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16
14706 lanes in xmm(E), turn them into a byte, and put
14707 zero-extend of it in ireg(G). Doing this directly is just
14708 too cumbersome; give up therefore and call a helper. */
14709 if (have66noF2noF3(pfx
)
14710 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)
14711 && epartIsReg(getUChar(delta
))) { /* no memory case, it seems */
14712 delta
= dis_PMOVMSKB_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
14713 goto decode_success
;
14715 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14716 /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in
14717 mmx(E), turn them into a byte, and put zero-extend of it in
14719 if (haveNo66noF2noF3(pfx
)
14720 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
14721 modrm
= getUChar(delta
);
14722 if (epartIsReg(modrm
)) {
14724 t0
= newTemp(Ity_I64
);
14725 t1
= newTemp(Ity_I32
);
14726 assign(t0
, getMMXReg(eregLO3ofRM(modrm
)));
14727 assign(t1
, unop(Iop_8Uto32
, unop(Iop_GetMSBs8x8
, mkexpr(t0
))));
14728 putIReg32(gregOfRexRM(pfx
,modrm
), mkexpr(t1
));
14729 DIP("pmovmskb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm
)),
14730 nameIReg32(gregOfRexRM(pfx
,modrm
)));
14732 goto decode_success
;
14734 /* else fall through */
14739 /* 66 0F D8 = PSUBUSB */
14740 if (have66noF2noF3(pfx
) && sz
== 2) {
14741 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14742 "psubusb", Iop_QSub8Ux16
, False
);
14743 goto decode_success
;
14748 /* 66 0F D9 = PSUBUSW */
14749 if (have66noF2noF3(pfx
) && sz
== 2) {
14750 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14751 "psubusw", Iop_QSub16Ux8
, False
);
14752 goto decode_success
;
14757 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14758 /* 0F DA = PMINUB -- 8x8 unsigned min */
14759 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14761 delta
= dis_MMXop_regmem_to_reg (
14762 vbi
, pfx
, delta
, opc
, "pminub", False
);
14763 goto decode_success
;
14765 /* 66 0F DA = PMINUB -- 8x16 unsigned min */
14766 if (have66noF2noF3(pfx
) && sz
== 2) {
14767 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14768 "pminub", Iop_Min8Ux16
, False
);
14769 goto decode_success
;
14774 /* 66 0F DB = PAND */
14775 if (have66noF2noF3(pfx
) && sz
== 2) {
14776 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "pand", Iop_AndV128
);
14777 goto decode_success
;
14782 /* 66 0F DC = PADDUSB */
14783 if (have66noF2noF3(pfx
) && sz
== 2) {
14784 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14785 "paddusb", Iop_QAdd8Ux16
, False
);
14786 goto decode_success
;
14791 /* 66 0F DD = PADDUSW */
14792 if (have66noF2noF3(pfx
) && sz
== 2) {
14793 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14794 "paddusw", Iop_QAdd16Ux8
, False
);
14795 goto decode_success
;
14800 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14801 /* 0F DE = PMAXUB -- 8x8 unsigned max */
14802 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14804 delta
= dis_MMXop_regmem_to_reg (
14805 vbi
, pfx
, delta
, opc
, "pmaxub", False
);
14806 goto decode_success
;
14808 /* 66 0F DE = PMAXUB -- 8x16 unsigned max */
14809 if (have66noF2noF3(pfx
) && sz
== 2) {
14810 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14811 "pmaxub", Iop_Max8Ux16
, False
);
14812 goto decode_success
;
14817 /* 66 0F DF = PANDN */
14818 if (have66noF2noF3(pfx
) && sz
== 2) {
14819 delta
= dis_SSE_E_to_G_all_invG( vbi
, pfx
, delta
, "pandn", Iop_AndV128
);
14820 goto decode_success
;
14825 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14826 /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */
14827 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14829 delta
= dis_MMXop_regmem_to_reg (
14830 vbi
, pfx
, delta
, opc
, "pavgb", False
);
14831 goto decode_success
;
14833 /* 66 0F E0 = PAVGB */
14834 if (have66noF2noF3(pfx
) && sz
== 2) {
14835 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14836 "pavgb", Iop_Avg8Ux16
, False
);
14837 goto decode_success
;
14842 /* 66 0F E1 = PSRAW by E */
14843 if (have66noF2noF3(pfx
) && sz
== 2) {
14844 delta
= dis_SSE_shiftG_byE( vbi
, pfx
, delta
, "psraw", Iop_SarN16x8
);
14845 goto decode_success
;
14850 /* 66 0F E2 = PSRAD by E */
14851 if (have66noF2noF3(pfx
) && sz
== 2) {
14852 delta
= dis_SSE_shiftG_byE( vbi
, pfx
, delta
, "psrad", Iop_SarN32x4
);
14853 goto decode_success
;
14858 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14859 /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */
14860 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14862 delta
= dis_MMXop_regmem_to_reg (
14863 vbi
, pfx
, delta
, opc
, "pavgw", False
);
14864 goto decode_success
;
14866 /* 66 0F E3 = PAVGW */
14867 if (have66noF2noF3(pfx
) && sz
== 2) {
14868 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14869 "pavgw", Iop_Avg16Ux8
, False
);
14870 goto decode_success
;
14875 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14876 /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */
14877 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14879 delta
= dis_MMXop_regmem_to_reg (
14880 vbi
, pfx
, delta
, opc
, "pmuluh", False
);
14881 goto decode_success
;
14883 /* 66 0F E4 = PMULHUW -- 16x8 hi-half of unsigned widening multiply */
14884 if (have66noF2noF3(pfx
) && sz
== 2) {
14885 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14886 "pmulhuw", Iop_MulHi16Ux8
, False
);
14887 goto decode_success
;
14892 /* 66 0F E5 = PMULHW -- 16x8 hi-half of signed widening multiply */
14893 if (have66noF2noF3(pfx
) && sz
== 2) {
14894 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14895 "pmulhw", Iop_MulHi16Sx8
, False
);
14896 goto decode_success
;
14901 /* 66 0F E6 = CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
14902 lo half xmm(G), and zero upper half, rounding towards zero */
14903 /* F2 0F E6 = CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
14904 lo half xmm(G), according to prevailing rounding mode, and zero
14906 if ( (haveF2no66noF3(pfx
) && sz
== 4)
14907 || (have66noF2noF3(pfx
) && sz
== 2) ) {
14908 delta
= dis_CVTxPD2DQ_128( vbi
, pfx
, delta
, False
/*!isAvx*/,
14909 toBool(sz
== 2)/*r2zero*/);
14910 goto decode_success
;
14912 /* F3 0F E6 = CVTDQ2PD -- convert 2 x I32 in mem/lo half xmm to 2 x
14914 if (haveF3no66noF2(pfx
) && sz
== 4) {
14915 delta
= dis_CVTDQ2PD_128(vbi
, pfx
, delta
, False
/*!isAvx*/);
14916 goto decode_success
;
14921 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14922 /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the
14923 Intel manual does not say anything about the usual business of
14924 the FP reg tags getting trashed whenever an MMX insn happens.
14925 So we just leave them alone.
14927 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14928 modrm
= getUChar(delta
);
14929 if (!epartIsReg(modrm
)) {
14930 /* do_MMX_preamble(); Intel docs don't specify this */
14931 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
14932 storeLE( mkexpr(addr
), getMMXReg(gregLO3ofRM(modrm
)) );
14933 DIP("movntq %s,%s\n", dis_buf
,
14934 nameMMXReg(gregLO3ofRM(modrm
)));
14936 goto decode_success
;
14938 /* else fall through */
14940 /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */
14941 if (have66noF2noF3(pfx
) && sz
== 2) {
14942 modrm
= getUChar(delta
);
14943 if (!epartIsReg(modrm
)) {
14944 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
14945 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
14946 storeLE( mkexpr(addr
), getXMMReg(gregOfRexRM(pfx
,modrm
)) );
14947 DIP("movntdq %s,%s\n", dis_buf
,
14948 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
14950 goto decode_success
;
14952 /* else fall through */
14957 /* 66 0F E8 = PSUBSB */
14958 if (have66noF2noF3(pfx
) && sz
== 2) {
14959 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14960 "psubsb", Iop_QSub8Sx16
, False
);
14961 goto decode_success
;
14966 /* 66 0F E9 = PSUBSW */
14967 if (have66noF2noF3(pfx
) && sz
== 2) {
14968 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14969 "psubsw", Iop_QSub16Sx8
, False
);
14970 goto decode_success
;
14975 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14976 /* 0F EA = PMINSW -- 16x4 signed min */
14977 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
14979 delta
= dis_MMXop_regmem_to_reg (
14980 vbi
, pfx
, delta
, opc
, "pminsw", False
);
14981 goto decode_success
;
14983 /* 66 0F EA = PMINSW -- 16x8 signed min */
14984 if (have66noF2noF3(pfx
) && sz
== 2) {
14985 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
14986 "pminsw", Iop_Min16Sx8
, False
);
14987 goto decode_success
;
14992 /* 66 0F EB = POR */
14993 if (have66noF2noF3(pfx
) && sz
== 2) {
14994 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "por", Iop_OrV128
);
14995 goto decode_success
;
15000 /* 66 0F EC = PADDSB */
15001 if (have66noF2noF3(pfx
) && sz
== 2) {
15002 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
15003 "paddsb", Iop_QAdd8Sx16
, False
);
15004 goto decode_success
;
15009 /* 66 0F ED = PADDSW */
15010 if (have66noF2noF3(pfx
) && sz
== 2) {
15011 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
15012 "paddsw", Iop_QAdd16Sx8
, False
);
15013 goto decode_success
;
15018 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
15019 /* 0F EE = PMAXSW -- 16x4 signed max */
15020 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
15022 delta
= dis_MMXop_regmem_to_reg (
15023 vbi
, pfx
, delta
, opc
, "pmaxsw", False
);
15024 goto decode_success
;
15026 /* 66 0F EE = PMAXSW -- 16x8 signed max */
15027 if (have66noF2noF3(pfx
) && sz
== 2) {
15028 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
15029 "pmaxsw", Iop_Max16Sx8
, False
);
15030 goto decode_success
;
15035 /* 66 0F EF = PXOR */
15036 if (have66noF2noF3(pfx
) && sz
== 2) {
15037 delta
= dis_SSE_E_to_G_all( vbi
, pfx
, delta
, "pxor", Iop_XorV128
);
15038 goto decode_success
;
15043 /* 66 0F F1 = PSLLW by E */
15044 if (have66noF2noF3(pfx
) && sz
== 2) {
15045 delta
= dis_SSE_shiftG_byE( vbi
, pfx
, delta
, "psllw", Iop_ShlN16x8
);
15046 goto decode_success
;
15051 /* 66 0F F2 = PSLLD by E */
15052 if (have66noF2noF3(pfx
) && sz
== 2) {
15053 delta
= dis_SSE_shiftG_byE( vbi
, pfx
, delta
, "pslld", Iop_ShlN32x4
);
15054 goto decode_success
;
15059 /* 66 0F F3 = PSLLQ by E */
15060 if (have66noF2noF3(pfx
) && sz
== 2) {
15061 delta
= dis_SSE_shiftG_byE( vbi
, pfx
, delta
, "psllq", Iop_ShlN64x2
);
15062 goto decode_success
;
15067 /* 66 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
15068 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit
15070 if (have66noF2noF3(pfx
) && sz
== 2) {
15071 IRTemp sV
= newTemp(Ity_V128
);
15072 IRTemp dV
= newTemp(Ity_V128
);
15073 modrm
= getUChar(delta
);
15074 UInt rG
= gregOfRexRM(pfx
,modrm
);
15075 assign( dV
, getXMMReg(rG
) );
15076 if (epartIsReg(modrm
)) {
15077 UInt rE
= eregOfRexRM(pfx
,modrm
);
15078 assign( sV
, getXMMReg(rE
) );
15080 DIP("pmuludq %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
15082 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15083 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
15085 DIP("pmuludq %s,%s\n", dis_buf
, nameXMMReg(rG
));
15087 putXMMReg( rG
, mkexpr(math_PMULUDQ_128( sV
, dV
)) );
15088 goto decode_success
;
15090 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
15091 /* 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
15092 0 to form 64-bit result */
15093 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
15094 IRTemp sV
= newTemp(Ity_I64
);
15095 IRTemp dV
= newTemp(Ity_I64
);
15096 t1
= newTemp(Ity_I32
);
15097 t0
= newTemp(Ity_I32
);
15098 modrm
= getUChar(delta
);
15101 assign( dV
, getMMXReg(gregLO3ofRM(modrm
)) );
15103 if (epartIsReg(modrm
)) {
15104 assign( sV
, getMMXReg(eregLO3ofRM(modrm
)) );
15106 DIP("pmuludq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm
)),
15107 nameMMXReg(gregLO3ofRM(modrm
)));
15109 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15110 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
15112 DIP("pmuludq %s,%s\n", dis_buf
,
15113 nameMMXReg(gregLO3ofRM(modrm
)));
15116 assign( t0
, unop(Iop_64to32
, mkexpr(dV
)) );
15117 assign( t1
, unop(Iop_64to32
, mkexpr(sV
)) );
15118 putMMXReg( gregLO3ofRM(modrm
),
15119 binop( Iop_MullU32
, mkexpr(t0
), mkexpr(t1
) ) );
15120 goto decode_success
;
15125 /* 66 0F F5 = PMADDWD -- Multiply and add packed integers from
15126 E(xmm or mem) to G(xmm) */
15127 if (have66noF2noF3(pfx
) && sz
== 2) {
15128 IRTemp sV
= newTemp(Ity_V128
);
15129 IRTemp dV
= newTemp(Ity_V128
);
15130 modrm
= getUChar(delta
);
15131 UInt rG
= gregOfRexRM(pfx
,modrm
);
15132 if (epartIsReg(modrm
)) {
15133 UInt rE
= eregOfRexRM(pfx
,modrm
);
15134 assign( sV
, getXMMReg(rE
) );
15136 DIP("pmaddwd %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
15138 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15139 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
15141 DIP("pmaddwd %s,%s\n", dis_buf
, nameXMMReg(rG
));
15143 assign( dV
, getXMMReg(rG
) );
15144 putXMMReg( rG
, mkexpr(math_PMADDWD_128(dV
, sV
)) );
15145 goto decode_success
;
15150 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
15151 /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */
15152 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
15154 delta
= dis_MMXop_regmem_to_reg (
15155 vbi
, pfx
, delta
, opc
, "psadbw", False
);
15156 goto decode_success
;
15158 /* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs
15159 from E(xmm or mem) to G(xmm) */
15160 if (have66noF2noF3(pfx
) && sz
== 2) {
15161 IRTemp sV
= newTemp(Ity_V128
);
15162 IRTemp dV
= newTemp(Ity_V128
);
15163 modrm
= getUChar(delta
);
15164 UInt rG
= gregOfRexRM(pfx
,modrm
);
15165 if (epartIsReg(modrm
)) {
15166 UInt rE
= eregOfRexRM(pfx
,modrm
);
15167 assign( sV
, getXMMReg(rE
) );
15169 DIP("psadbw %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
15171 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15172 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
15174 DIP("psadbw %s,%s\n", dis_buf
, nameXMMReg(rG
));
15176 assign( dV
, getXMMReg(rG
) );
15177 putXMMReg( rG
, mkexpr( math_PSADBW_128 ( dV
, sV
) ) );
15179 goto decode_success
;
15184 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
15185 /* 0F F7 = MASKMOVQ -- 8x8 masked store */
15186 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
15188 delta
= dis_MMX( &ok
, vbi
, pfx
, sz
, delta
-1 );
15189 if (ok
) goto decode_success
;
15191 /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */
15192 if (have66noF2noF3(pfx
) && sz
== 2 && epartIsReg(getUChar(delta
))) {
15193 delta
= dis_MASKMOVDQU( vbi
, pfx
, delta
, False
/*!isAvx*/ );
15194 goto decode_success
;
15199 /* 66 0F F8 = PSUBB */
15200 if (have66noF2noF3(pfx
) && sz
== 2) {
15201 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
15202 "psubb", Iop_Sub8x16
, False
);
15203 goto decode_success
;
15208 /* 66 0F F9 = PSUBW */
15209 if (have66noF2noF3(pfx
) && sz
== 2) {
15210 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
15211 "psubw", Iop_Sub16x8
, False
);
15212 goto decode_success
;
15217 /* 66 0F FA = PSUBD */
15218 if (have66noF2noF3(pfx
) && sz
== 2) {
15219 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
15220 "psubd", Iop_Sub32x4
, False
);
15221 goto decode_success
;
15226 /* 66 0F FB = PSUBQ */
15227 if (have66noF2noF3(pfx
) && sz
== 2) {
15228 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
15229 "psubq", Iop_Sub64x2
, False
);
15230 goto decode_success
;
15232 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
15233 /* 0F FB = PSUBQ -- sub 64x1 */
15234 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
15236 delta
= dis_MMXop_regmem_to_reg (
15237 vbi
, pfx
, delta
, opc
, "psubq", False
);
15238 goto decode_success
;
15243 /* 66 0F FC = PADDB */
15244 if (have66noF2noF3(pfx
) && sz
== 2) {
15245 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
15246 "paddb", Iop_Add8x16
, False
);
15247 goto decode_success
;
15252 /* 66 0F FD = PADDW */
15253 if (have66noF2noF3(pfx
) && sz
== 2) {
15254 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
15255 "paddw", Iop_Add16x8
, False
);
15256 goto decode_success
;
15261 /* 66 0F FE = PADDD */
15262 if (have66noF2noF3(pfx
) && sz
== 2) {
15263 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
15264 "paddd", Iop_Add32x4
, False
);
15265 goto decode_success
;
15270 goto decode_failure
;
15275 *decode_OK
= False
;
15284 /*------------------------------------------------------------*/
15286 /*--- Top-level SSE3 (not SupSSE3): dis_ESC_0F__SSE3 ---*/
15288 /*------------------------------------------------------------*/
15290 static Long
dis_MOVDDUP_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
15291 Long delta
, Bool isAvx
)
15293 IRTemp addr
= IRTemp_INVALID
;
15296 IRTemp sV
= newTemp(Ity_V128
);
15297 IRTemp d0
= newTemp(Ity_I64
);
15298 UChar modrm
= getUChar(delta
);
15299 UInt rG
= gregOfRexRM(pfx
,modrm
);
15300 if (epartIsReg(modrm
)) {
15301 UInt rE
= eregOfRexRM(pfx
,modrm
);
15302 assign( sV
, getXMMReg(rE
) );
15303 DIP("%smovddup %s,%s\n",
15304 isAvx
? "v" : "", nameXMMReg(rE
), nameXMMReg(rG
));
15306 assign ( d0
, unop(Iop_V128to64
, mkexpr(sV
)) );
15308 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15309 assign( d0
, loadLE(Ity_I64
, mkexpr(addr
)) );
15310 DIP("%smovddup %s,%s\n",
15311 isAvx
? "v" : "", dis_buf
, nameXMMReg(rG
));
15314 (isAvx
? putYMMRegLoAndZU
: putXMMReg
)
15315 ( rG
, binop(Iop_64HLtoV128
,mkexpr(d0
),mkexpr(d0
)) );
15320 static Long
dis_MOVDDUP_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
15323 IRTemp addr
= IRTemp_INVALID
;
15326 IRTemp d0
= newTemp(Ity_I64
);
15327 IRTemp d1
= newTemp(Ity_I64
);
15328 UChar modrm
= getUChar(delta
);
15329 UInt rG
= gregOfRexRM(pfx
,modrm
);
15330 if (epartIsReg(modrm
)) {
15331 UInt rE
= eregOfRexRM(pfx
,modrm
);
15332 DIP("vmovddup %s,%s\n", nameYMMReg(rE
), nameYMMReg(rG
));
15334 assign ( d0
, getYMMRegLane64(rE
, 0) );
15335 assign ( d1
, getYMMRegLane64(rE
, 2) );
15337 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15338 assign( d0
, loadLE(Ity_I64
, mkexpr(addr
)) );
15339 assign( d1
, loadLE(Ity_I64
, binop(Iop_Add64
,
15340 mkexpr(addr
), mkU64(16))) );
15341 DIP("vmovddup %s,%s\n", dis_buf
, nameYMMReg(rG
));
15344 putYMMRegLane64( rG
, 0, mkexpr(d0
) );
15345 putYMMRegLane64( rG
, 1, mkexpr(d0
) );
15346 putYMMRegLane64( rG
, 2, mkexpr(d1
) );
15347 putYMMRegLane64( rG
, 3, mkexpr(d1
) );
15352 static Long
dis_MOVSxDUP_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
15353 Long delta
, Bool isAvx
, Bool isL
)
15355 IRTemp addr
= IRTemp_INVALID
;
15358 IRTemp sV
= newTemp(Ity_V128
);
15359 UChar modrm
= getUChar(delta
);
15360 UInt rG
= gregOfRexRM(pfx
,modrm
);
15361 IRTemp s3
, s2
, s1
, s0
;
15362 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
15363 if (epartIsReg(modrm
)) {
15364 UInt rE
= eregOfRexRM(pfx
,modrm
);
15365 assign( sV
, getXMMReg(rE
) );
15366 DIP("%smovs%cdup %s,%s\n",
15367 isAvx
? "v" : "", isL
? 'l' : 'h', nameXMMReg(rE
), nameXMMReg(rG
));
15370 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15372 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
15373 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
15374 DIP("%smovs%cdup %s,%s\n",
15375 isAvx
? "v" : "", isL
? 'l' : 'h', dis_buf
, nameXMMReg(rG
));
15378 breakupV128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
15379 (isAvx
? putYMMRegLoAndZU
: putXMMReg
)
15380 ( rG
, isL
? mkV128from32s( s2
, s2
, s0
, s0
)
15381 : mkV128from32s( s3
, s3
, s1
, s1
) );
15386 static Long
dis_MOVSxDUP_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
15387 Long delta
, Bool isL
)
15389 IRTemp addr
= IRTemp_INVALID
;
15392 IRTemp sV
= newTemp(Ity_V256
);
15393 UChar modrm
= getUChar(delta
);
15394 UInt rG
= gregOfRexRM(pfx
,modrm
);
15395 IRTemp s7
, s6
, s5
, s4
, s3
, s2
, s1
, s0
;
15396 s7
= s6
= s5
= s4
= s3
= s2
= s1
= s0
= IRTemp_INVALID
;
15397 if (epartIsReg(modrm
)) {
15398 UInt rE
= eregOfRexRM(pfx
,modrm
);
15399 assign( sV
, getYMMReg(rE
) );
15400 DIP("vmovs%cdup %s,%s\n",
15401 isL
? 'l' : 'h', nameYMMReg(rE
), nameYMMReg(rG
));
15404 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15405 assign( sV
, loadLE(Ity_V256
, mkexpr(addr
)) );
15406 DIP("vmovs%cdup %s,%s\n",
15407 isL
? 'l' : 'h', dis_buf
, nameYMMReg(rG
));
15410 breakupV256to32s( sV
, &s7
, &s6
, &s5
, &s4
, &s3
, &s2
, &s1
, &s0
);
15411 putYMMRegLane128( rG
, 1, isL
? mkV128from32s( s6
, s6
, s4
, s4
)
15412 : mkV128from32s( s7
, s7
, s5
, s5
) );
15413 putYMMRegLane128( rG
, 0, isL
? mkV128from32s( s2
, s2
, s0
, s0
)
15414 : mkV128from32s( s3
, s3
, s1
, s1
) );
15419 static IRTemp
math_HADDPS_128 ( IRTemp dV
, IRTemp sV
, Bool isAdd
)
15421 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
15422 IRTemp leftV
= newTemp(Ity_V128
);
15423 IRTemp rightV
= newTemp(Ity_V128
);
15424 IRTemp rm
= newTemp(Ity_I32
);
15425 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
15427 breakupV128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
15428 breakupV128to32s( dV
, &d3
, &d2
, &d1
, &d0
);
15430 assign( leftV
, mkV128from32s( s2
, s0
, d2
, d0
) );
15431 assign( rightV
, mkV128from32s( s3
, s1
, d3
, d1
) );
15433 IRTemp res
= newTemp(Ity_V128
);
15434 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
15435 assign( res
, triop(isAdd
? Iop_Add32Fx4
: Iop_Sub32Fx4
,
15436 mkexpr(rm
), mkexpr(leftV
), mkexpr(rightV
) ) );
15441 static IRTemp
math_HADDPD_128 ( IRTemp dV
, IRTemp sV
, Bool isAdd
)
15443 IRTemp s1
, s0
, d1
, d0
;
15444 IRTemp leftV
= newTemp(Ity_V128
);
15445 IRTemp rightV
= newTemp(Ity_V128
);
15446 IRTemp rm
= newTemp(Ity_I32
);
15447 s1
= s0
= d1
= d0
= IRTemp_INVALID
;
15449 breakupV128to64s( sV
, &s1
, &s0
);
15450 breakupV128to64s( dV
, &d1
, &d0
);
15452 assign( leftV
, binop(Iop_64HLtoV128
, mkexpr(s0
), mkexpr(d0
)) );
15453 assign( rightV
, binop(Iop_64HLtoV128
, mkexpr(s1
), mkexpr(d1
)) );
15455 IRTemp res
= newTemp(Ity_V128
);
15456 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
15457 assign( res
, triop(isAdd
? Iop_Add64Fx2
: Iop_Sub64Fx2
,
15458 mkexpr(rm
), mkexpr(leftV
), mkexpr(rightV
) ) );
15463 __attribute__((noinline
))
15465 Long
dis_ESC_0F__SSE3 ( Bool
* decode_OK
,
15466 const VexAbiInfo
* vbi
,
15467 Prefix pfx
, Int sz
, Long deltaIN
)
15469 IRTemp addr
= IRTemp_INVALID
;
15474 *decode_OK
= False
;
15476 Long delta
= deltaIN
;
15477 UChar opc
= getUChar(delta
);
15482 /* F3 0F 12 = MOVSLDUP -- move from E (mem or xmm) to G (xmm),
15483 duplicating some lanes (2:2:0:0). */
15484 if (haveF3no66noF2(pfx
) && sz
== 4) {
15485 delta
= dis_MOVSxDUP_128( vbi
, pfx
, delta
, False
/*!isAvx*/,
15487 goto decode_success
;
15489 /* F2 0F 12 = MOVDDUP -- move from E (mem or xmm) to G (xmm),
15490 duplicating some lanes (0:1:0:1). */
15491 if (haveF2no66noF3(pfx
)
15492 && (sz
== 4 || /* ignore redundant REX.W */ sz
== 8)) {
15493 delta
= dis_MOVDDUP_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
15494 goto decode_success
;
15499 /* F3 0F 16 = MOVSHDUP -- move from E (mem or xmm) to G (xmm),
15500 duplicating some lanes (3:3:1:1). */
15501 if (haveF3no66noF2(pfx
) && sz
== 4) {
15502 delta
= dis_MOVSxDUP_128( vbi
, pfx
, delta
, False
/*!isAvx*/,
15504 goto decode_success
;
15510 /* F2 0F 7C = HADDPS -- 32x4 add across from E (mem or xmm) to G (xmm). */
15511 /* F2 0F 7D = HSUBPS -- 32x4 sub across from E (mem or xmm) to G (xmm). */
15512 if (haveF2no66noF3(pfx
) && sz
== 4) {
15513 IRTemp eV
= newTemp(Ity_V128
);
15514 IRTemp gV
= newTemp(Ity_V128
);
15515 Bool isAdd
= opc
== 0x7C;
15516 const HChar
* str
= isAdd
? "add" : "sub";
15517 modrm
= getUChar(delta
);
15518 UInt rG
= gregOfRexRM(pfx
,modrm
);
15519 if (epartIsReg(modrm
)) {
15520 UInt rE
= eregOfRexRM(pfx
,modrm
);
15521 assign( eV
, getXMMReg(rE
) );
15522 DIP("h%sps %s,%s\n", str
, nameXMMReg(rE
), nameXMMReg(rG
));
15525 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15526 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
15527 DIP("h%sps %s,%s\n", str
, dis_buf
, nameXMMReg(rG
));
15531 assign( gV
, getXMMReg(rG
) );
15532 putXMMReg( rG
, mkexpr( math_HADDPS_128 ( gV
, eV
, isAdd
) ) );
15533 goto decode_success
;
15535 /* 66 0F 7C = HADDPD -- 64x2 add across from E (mem or xmm) to G (xmm). */
15536 /* 66 0F 7D = HSUBPD -- 64x2 sub across from E (mem or xmm) to G (xmm). */
15537 if (have66noF2noF3(pfx
) && sz
== 2) {
15538 IRTemp eV
= newTemp(Ity_V128
);
15539 IRTemp gV
= newTemp(Ity_V128
);
15540 Bool isAdd
= opc
== 0x7C;
15541 const HChar
* str
= isAdd
? "add" : "sub";
15542 modrm
= getUChar(delta
);
15543 UInt rG
= gregOfRexRM(pfx
,modrm
);
15544 if (epartIsReg(modrm
)) {
15545 UInt rE
= eregOfRexRM(pfx
,modrm
);
15546 assign( eV
, getXMMReg(rE
) );
15547 DIP("h%spd %s,%s\n", str
, nameXMMReg(rE
), nameXMMReg(rG
));
15550 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15551 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
15552 DIP("h%spd %s,%s\n", str
, dis_buf
, nameXMMReg(rG
));
15556 assign( gV
, getXMMReg(rG
) );
15557 putXMMReg( rG
, mkexpr( math_HADDPD_128 ( gV
, eV
, isAdd
) ) );
15558 goto decode_success
;
15563 /* 66 0F D0 = ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). */
15564 if (have66noF2noF3(pfx
) && sz
== 2) {
15565 IRTemp eV
= newTemp(Ity_V128
);
15566 IRTemp gV
= newTemp(Ity_V128
);
15567 modrm
= getUChar(delta
);
15568 UInt rG
= gregOfRexRM(pfx
,modrm
);
15569 if (epartIsReg(modrm
)) {
15570 UInt rE
= eregOfRexRM(pfx
,modrm
);
15571 assign( eV
, getXMMReg(rE
) );
15572 DIP("addsubpd %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
15575 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15576 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
15577 DIP("addsubpd %s,%s\n", dis_buf
, nameXMMReg(rG
));
15581 assign( gV
, getXMMReg(rG
) );
15582 putXMMReg( rG
, mkexpr( math_ADDSUBPD_128 ( gV
, eV
) ) );
15583 goto decode_success
;
15585 /* F2 0F D0 = ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xmm). */
15586 if (haveF2no66noF3(pfx
) && sz
== 4) {
15587 IRTemp eV
= newTemp(Ity_V128
);
15588 IRTemp gV
= newTemp(Ity_V128
);
15589 modrm
= getUChar(delta
);
15590 UInt rG
= gregOfRexRM(pfx
,modrm
);
15592 modrm
= getUChar(delta
);
15593 if (epartIsReg(modrm
)) {
15594 UInt rE
= eregOfRexRM(pfx
,modrm
);
15595 assign( eV
, getXMMReg(rE
) );
15596 DIP("addsubps %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
15599 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15600 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
15601 DIP("addsubps %s,%s\n", dis_buf
, nameXMMReg(rG
));
15605 assign( gV
, getXMMReg(rG
) );
15606 putXMMReg( rG
, mkexpr( math_ADDSUBPS_128 ( gV
, eV
) ) );
15607 goto decode_success
;
15612 /* F2 0F F0 = LDDQU -- move from E (mem or xmm) to G (xmm). */
15613 if (haveF2no66noF3(pfx
) && sz
== 4) {
15614 modrm
= getUChar(delta
);
15615 if (epartIsReg(modrm
)) {
15616 goto decode_failure
;
15618 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15619 putXMMReg( gregOfRexRM(pfx
,modrm
),
15620 loadLE(Ity_V128
, mkexpr(addr
)) );
15621 DIP("lddqu %s,%s\n", dis_buf
,
15622 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
15625 goto decode_success
;
15630 goto decode_failure
;
15635 *decode_OK
= False
;
15644 /*------------------------------------------------------------*/
15646 /*--- Top-level SSSE3: dis_ESC_0F38__SupSSE3 ---*/
15648 /*------------------------------------------------------------*/
15651 IRTemp
math_PSHUFB_XMM ( IRTemp dV
/*data to perm*/, IRTemp sV
/*perm*/ )
15653 IRTemp halfMask
= newTemp(Ity_I64
);
15654 assign(halfMask
, mkU64(0x8F8F8F8F8F8F8F8FULL
));
15655 IRExpr
* mask
= binop(Iop_64HLtoV128
, mkexpr(halfMask
), mkexpr(halfMask
));
15656 IRTemp res
= newTemp(Ity_V128
);
15658 binop(Iop_PermOrZero8x16
,
15660 // Mask off bits [6:3] of each source operand lane
15661 binop(Iop_AndV128
, mkexpr(sV
), mask
)
15668 IRTemp
math_PSHUFB_YMM ( IRTemp dV
/*data to perm*/, IRTemp sV
/*perm*/ )
15670 IRTemp sHi
, sLo
, dHi
, dLo
;
15671 sHi
= sLo
= dHi
= dLo
= IRTemp_INVALID
;
15672 breakupV256toV128s( dV
, &dHi
, &dLo
);
15673 breakupV256toV128s( sV
, &sHi
, &sLo
);
15674 IRTemp res
= newTemp(Ity_V256
);
15675 assign(res
, binop(Iop_V128HLtoV256
,
15676 mkexpr(math_PSHUFB_XMM(dHi
, sHi
)),
15677 mkexpr(math_PSHUFB_XMM(dLo
, sLo
))));
15682 static Long
dis_PHADD_128 ( const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
,
15683 Bool isAvx
, UChar opc
)
15685 IRTemp addr
= IRTemp_INVALID
;
15688 const HChar
* str
= "???";
15689 IROp opV64
= Iop_INVALID
;
15690 IROp opCatO
= Iop_CatOddLanes16x4
;
15691 IROp opCatE
= Iop_CatEvenLanes16x4
;
15692 IRTemp sV
= newTemp(Ity_V128
);
15693 IRTemp dV
= newTemp(Ity_V128
);
15694 IRTemp sHi
= newTemp(Ity_I64
);
15695 IRTemp sLo
= newTemp(Ity_I64
);
15696 IRTemp dHi
= newTemp(Ity_I64
);
15697 IRTemp dLo
= newTemp(Ity_I64
);
15698 UChar modrm
= getUChar(delta
);
15699 UInt rG
= gregOfRexRM(pfx
,modrm
);
15700 UInt rV
= isAvx
? getVexNvvvv(pfx
) : rG
;
15703 case 0x01: opV64
= Iop_Add16x4
; str
= "addw"; break;
15704 case 0x02: opV64
= Iop_Add32x2
; str
= "addd"; break;
15705 case 0x03: opV64
= Iop_QAdd16Sx4
; str
= "addsw"; break;
15706 case 0x05: opV64
= Iop_Sub16x4
; str
= "subw"; break;
15707 case 0x06: opV64
= Iop_Sub32x2
; str
= "subd"; break;
15708 case 0x07: opV64
= Iop_QSub16Sx4
; str
= "subsw"; break;
15709 default: vassert(0);
15711 if (opc
== 0x02 || opc
== 0x06) {
15712 opCatO
= Iop_InterleaveHI32x2
;
15713 opCatE
= Iop_InterleaveLO32x2
;
15716 assign( dV
, getXMMReg(rV
) );
15718 if (epartIsReg(modrm
)) {
15719 UInt rE
= eregOfRexRM(pfx
,modrm
);
15720 assign( sV
, getXMMReg(rE
) );
15721 DIP("%sph%s %s,%s\n", isAvx
? "v" : "", str
,
15722 nameXMMReg(rE
), nameXMMReg(rG
));
15725 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15727 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
15728 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
15729 DIP("%sph%s %s,%s\n", isAvx
? "v" : "", str
,
15730 dis_buf
, nameXMMReg(rG
));
15734 assign( dHi
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
15735 assign( dLo
, unop(Iop_V128to64
, mkexpr(dV
)) );
15736 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
15737 assign( sLo
, unop(Iop_V128to64
, mkexpr(sV
)) );
15739 /* This isn't a particularly efficient way to compute the
15740 result, but at least it avoids a proliferation of IROps,
15741 hence avoids complication all the backends. */
15743 (isAvx
? putYMMRegLoAndZU
: putXMMReg
)
15745 binop(Iop_64HLtoV128
,
15747 binop(opCatE
,mkexpr(sHi
),mkexpr(sLo
)),
15748 binop(opCatO
,mkexpr(sHi
),mkexpr(sLo
)) ),
15750 binop(opCatE
,mkexpr(dHi
),mkexpr(dLo
)),
15751 binop(opCatO
,mkexpr(dHi
),mkexpr(dLo
)) ) ) );
15756 static Long
dis_PHADD_256 ( const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
,
15759 IRTemp addr
= IRTemp_INVALID
;
15762 const HChar
* str
= "???";
15763 IROp opV64
= Iop_INVALID
;
15764 IROp opCatO
= Iop_CatOddLanes16x4
;
15765 IROp opCatE
= Iop_CatEvenLanes16x4
;
15766 IRTemp sV
= newTemp(Ity_V256
);
15767 IRTemp dV
= newTemp(Ity_V256
);
15768 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
15769 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
15770 UChar modrm
= getUChar(delta
);
15771 UInt rG
= gregOfRexRM(pfx
,modrm
);
15772 UInt rV
= getVexNvvvv(pfx
);
15775 case 0x01: opV64
= Iop_Add16x4
; str
= "addw"; break;
15776 case 0x02: opV64
= Iop_Add32x2
; str
= "addd"; break;
15777 case 0x03: opV64
= Iop_QAdd16Sx4
; str
= "addsw"; break;
15778 case 0x05: opV64
= Iop_Sub16x4
; str
= "subw"; break;
15779 case 0x06: opV64
= Iop_Sub32x2
; str
= "subd"; break;
15780 case 0x07: opV64
= Iop_QSub16Sx4
; str
= "subsw"; break;
15781 default: vassert(0);
15783 if (opc
== 0x02 || opc
== 0x06) {
15784 opCatO
= Iop_InterleaveHI32x2
;
15785 opCatE
= Iop_InterleaveLO32x2
;
15788 assign( dV
, getYMMReg(rV
) );
15790 if (epartIsReg(modrm
)) {
15791 UInt rE
= eregOfRexRM(pfx
,modrm
);
15792 assign( sV
, getYMMReg(rE
) );
15793 DIP("vph%s %s,%s\n", str
, nameYMMReg(rE
), nameYMMReg(rG
));
15796 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15797 assign( sV
, loadLE(Ity_V256
, mkexpr(addr
)) );
15798 DIP("vph%s %s,%s\n", str
, dis_buf
, nameYMMReg(rG
));
15802 breakupV256to64s( dV
, &d3
, &d2
, &d1
, &d0
);
15803 breakupV256to64s( sV
, &s3
, &s2
, &s1
, &s0
);
15805 /* This isn't a particularly efficient way to compute the
15806 result, but at least it avoids a proliferation of IROps,
15807 hence avoids complication all the backends. */
15810 binop(Iop_V128HLtoV256
,
15811 binop(Iop_64HLtoV128
,
15813 binop(opCatE
,mkexpr(s3
),mkexpr(s2
)),
15814 binop(opCatO
,mkexpr(s3
),mkexpr(s2
)) ),
15816 binop(opCatE
,mkexpr(d3
),mkexpr(d2
)),
15817 binop(opCatO
,mkexpr(d3
),mkexpr(d2
)) ) ),
15818 binop(Iop_64HLtoV128
,
15820 binop(opCatE
,mkexpr(s1
),mkexpr(s0
)),
15821 binop(opCatO
,mkexpr(s1
),mkexpr(s0
)) ),
15823 binop(opCatE
,mkexpr(d1
),mkexpr(d0
)),
15824 binop(opCatO
,mkexpr(d1
),mkexpr(d0
)) ) ) ) );
15829 static IRTemp
math_PMADDUBSW_128 ( IRTemp dV
, IRTemp sV
)
15831 IRTemp res
= newTemp(Ity_V128
);
15832 assign(res
, binop(Iop_PwExtUSMulQAdd8x16
, mkexpr(dV
), mkexpr(sV
)));
15838 IRTemp
math_PMADDUBSW_256 ( IRTemp dV
, IRTemp sV
)
15840 IRTemp sHi
, sLo
, dHi
, dLo
;
15841 sHi
= sLo
= dHi
= dLo
= IRTemp_INVALID
;
15842 breakupV256toV128s( dV
, &dHi
, &dLo
);
15843 breakupV256toV128s( sV
, &sHi
, &sLo
);
15844 IRTemp res
= newTemp(Ity_V256
);
15845 assign(res
, binop(Iop_V128HLtoV256
,
15846 mkexpr(math_PMADDUBSW_128(dHi
, sHi
)),
15847 mkexpr(math_PMADDUBSW_128(dLo
, sLo
))));
15852 __attribute__((noinline
))
15854 Long
dis_ESC_0F38__SupSSE3 ( Bool
* decode_OK
,
15855 const VexAbiInfo
* vbi
,
15856 Prefix pfx
, Int sz
, Long deltaIN
)
15858 IRTemp addr
= IRTemp_INVALID
;
15863 *decode_OK
= False
;
15865 Long delta
= deltaIN
;
15866 UChar opc
= getUChar(delta
);
15871 /* 66 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x16 (XMM) */
15872 if (have66noF2noF3(pfx
)
15873 && (sz
== 2 || /*redundant REX.W*/ sz
== 8)) {
15874 IRTemp sV
= newTemp(Ity_V128
);
15875 IRTemp dV
= newTemp(Ity_V128
);
15877 modrm
= getUChar(delta
);
15878 assign( dV
, getXMMReg(gregOfRexRM(pfx
,modrm
)) );
15880 if (epartIsReg(modrm
)) {
15881 assign( sV
, getXMMReg(eregOfRexRM(pfx
,modrm
)) );
15883 DIP("pshufb %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
15884 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
15886 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15887 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
15888 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
15890 DIP("pshufb %s,%s\n", dis_buf
,
15891 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
15894 IRTemp res
= math_PSHUFB_XMM( dV
, sV
);
15895 putXMMReg(gregOfRexRM(pfx
,modrm
), mkexpr(res
));
15896 goto decode_success
;
15898 /* 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x8 (MMX) */
15899 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
15900 IRTemp sV
= newTemp(Ity_I64
);
15901 IRTemp dV
= newTemp(Ity_I64
);
15903 modrm
= getUChar(delta
);
15905 assign( dV
, getMMXReg(gregLO3ofRM(modrm
)) );
15907 if (epartIsReg(modrm
)) {
15908 assign( sV
, getMMXReg(eregLO3ofRM(modrm
)) );
15910 DIP("pshufb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm
)),
15911 nameMMXReg(gregLO3ofRM(modrm
)));
15913 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
15914 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
15916 DIP("pshufb %s,%s\n", dis_buf
,
15917 nameMMXReg(gregLO3ofRM(modrm
)));
15921 gregLO3ofRM(modrm
),
15925 // Mask off bits [6:3] of each source operand lane
15926 binop(Iop_And64
, mkexpr(sV
), mkU64(0x8787878787878787ULL
))
15929 goto decode_success
;
15939 /* 66 0F 38 01 = PHADDW -- 16x8 add across from E (mem or xmm) and
15941 /* 66 0F 38 02 = PHADDD -- 32x4 add across from E (mem or xmm) and
15943 /* 66 0F 38 03 = PHADDSW -- 16x8 signed qadd across from E (mem or
15944 xmm) and G to G (xmm). */
15945 /* 66 0F 38 05 = PHSUBW -- 16x8 sub across from E (mem or xmm) and
15947 /* 66 0F 38 06 = PHSUBD -- 32x4 sub across from E (mem or xmm) and
15949 /* 66 0F 38 07 = PHSUBSW -- 16x8 signed qsub across from E (mem or
15950 xmm) and G to G (xmm). */
15951 if (have66noF2noF3(pfx
)
15952 && (sz
== 2 || /*redundant REX.W*/ sz
== 8)) {
15953 delta
= dis_PHADD_128( vbi
, pfx
, delta
, False
/*isAvx*/, opc
);
15954 goto decode_success
;
15956 /* ***--- these are MMX class insns introduced in SSSE3 ---*** */
15957 /* 0F 38 01 = PHADDW -- 16x4 add across from E (mem or mmx) and G
15959 /* 0F 38 02 = PHADDD -- 32x2 add across from E (mem or mmx) and G
15961 /* 0F 38 03 = PHADDSW -- 16x4 signed qadd across from E (mem or
15962 mmx) and G to G (mmx). */
15963 /* 0F 38 05 = PHSUBW -- 16x4 sub across from E (mem or mmx) and G
15965 /* 0F 38 06 = PHSUBD -- 32x2 sub across from E (mem or mmx) and G
15967 /* 0F 38 07 = PHSUBSW -- 16x4 signed qsub across from E (mem or
15968 mmx) and G to G (mmx). */
15969 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
15970 const HChar
* str
= "???";
15971 IROp opV64
= Iop_INVALID
;
15972 IROp opCatO
= Iop_CatOddLanes16x4
;
15973 IROp opCatE
= Iop_CatEvenLanes16x4
;
15974 IRTemp sV
= newTemp(Ity_I64
);
15975 IRTemp dV
= newTemp(Ity_I64
);
15977 modrm
= getUChar(delta
);
15980 case 0x01: opV64
= Iop_Add16x4
; str
= "addw"; break;
15981 case 0x02: opV64
= Iop_Add32x2
; str
= "addd"; break;
15982 case 0x03: opV64
= Iop_QAdd16Sx4
; str
= "addsw"; break;
15983 case 0x05: opV64
= Iop_Sub16x4
; str
= "subw"; break;
15984 case 0x06: opV64
= Iop_Sub32x2
; str
= "subd"; break;
15985 case 0x07: opV64
= Iop_QSub16Sx4
; str
= "subsw"; break;
15986 default: vassert(0);
15988 if (opc
== 0x02 || opc
== 0x06) {
15989 opCatO
= Iop_InterleaveHI32x2
;
15990 opCatE
= Iop_InterleaveLO32x2
;
15994 assign( dV
, getMMXReg(gregLO3ofRM(modrm
)) );
15996 if (epartIsReg(modrm
)) {
15997 assign( sV
, getMMXReg(eregLO3ofRM(modrm
)) );
15999 DIP("ph%s %s,%s\n", str
, nameMMXReg(eregLO3ofRM(modrm
)),
16000 nameMMXReg(gregLO3ofRM(modrm
)));
16002 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
16003 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
16005 DIP("ph%s %s,%s\n", str
, dis_buf
,
16006 nameMMXReg(gregLO3ofRM(modrm
)));
16010 gregLO3ofRM(modrm
),
16012 binop(opCatE
,mkexpr(sV
),mkexpr(dV
)),
16013 binop(opCatO
,mkexpr(sV
),mkexpr(dV
))
16016 goto decode_success
;
16021 /* 66 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
16022 Unsigned Bytes (XMM) */
16023 if (have66noF2noF3(pfx
)
16024 && (sz
== 2 || /*redundant REX.W*/ sz
== 8)) {
16025 IRTemp sV
= newTemp(Ity_V128
);
16026 IRTemp dV
= newTemp(Ity_V128
);
16027 modrm
= getUChar(delta
);
16028 UInt rG
= gregOfRexRM(pfx
,modrm
);
16030 assign( dV
, getXMMReg(rG
) );
16032 if (epartIsReg(modrm
)) {
16033 UInt rE
= eregOfRexRM(pfx
,modrm
);
16034 assign( sV
, getXMMReg(rE
) );
16036 DIP("pmaddubsw %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
16038 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
16039 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
16040 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
16042 DIP("pmaddubsw %s,%s\n", dis_buf
, nameXMMReg(rG
));
16045 putXMMReg( rG
, mkexpr( math_PMADDUBSW_128( dV
, sV
) ) );
16046 goto decode_success
;
16048 /* 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
16049 Unsigned Bytes (MMX) */
16050 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
16051 IRTemp sV
= newTemp(Ity_I64
);
16052 IRTemp dV
= newTemp(Ity_I64
);
16053 IRTemp sVoddsSX
= newTemp(Ity_I64
);
16054 IRTemp sVevensSX
= newTemp(Ity_I64
);
16055 IRTemp dVoddsZX
= newTemp(Ity_I64
);
16056 IRTemp dVevensZX
= newTemp(Ity_I64
);
16058 modrm
= getUChar(delta
);
16060 assign( dV
, getMMXReg(gregLO3ofRM(modrm
)) );
16062 if (epartIsReg(modrm
)) {
16063 assign( sV
, getMMXReg(eregLO3ofRM(modrm
)) );
16065 DIP("pmaddubsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm
)),
16066 nameMMXReg(gregLO3ofRM(modrm
)));
16068 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
16069 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
16071 DIP("pmaddubsw %s,%s\n", dis_buf
,
16072 nameMMXReg(gregLO3ofRM(modrm
)));
16075 /* compute dV unsigned x sV signed */
16077 binop(Iop_SarN16x4
, mkexpr(sV
), mkU8(8)) );
16079 binop(Iop_SarN16x4
,
16080 binop(Iop_ShlN16x4
, mkexpr(sV
), mkU8(8)),
16083 binop(Iop_ShrN16x4
, mkexpr(dV
), mkU8(8)) );
16085 binop(Iop_ShrN16x4
,
16086 binop(Iop_ShlN16x4
, mkexpr(dV
), mkU8(8)),
16090 gregLO3ofRM(modrm
),
16091 binop(Iop_QAdd16Sx4
,
16092 binop(Iop_Mul16x4
, mkexpr(sVoddsSX
), mkexpr(dVoddsZX
)),
16093 binop(Iop_Mul16x4
, mkexpr(sVevensSX
), mkexpr(dVevensZX
))
16096 goto decode_success
;
16103 /* 66 0F 38 08 = PSIGNB -- Packed Sign 8x16 (XMM) */
16104 /* 66 0F 38 09 = PSIGNW -- Packed Sign 16x8 (XMM) */
16105 /* 66 0F 38 0A = PSIGND -- Packed Sign 32x4 (XMM) */
16106 if (have66noF2noF3(pfx
)
16107 && (sz
== 2 || /*redundant REX.W*/ sz
== 8)) {
16108 IRTemp sV
= newTemp(Ity_V128
);
16109 IRTemp dV
= newTemp(Ity_V128
);
16110 IRTemp sHi
= newTemp(Ity_I64
);
16111 IRTemp sLo
= newTemp(Ity_I64
);
16112 IRTemp dHi
= newTemp(Ity_I64
);
16113 IRTemp dLo
= newTemp(Ity_I64
);
16114 const HChar
* str
= "???";
16118 case 0x08: laneszB
= 1; str
= "b"; break;
16119 case 0x09: laneszB
= 2; str
= "w"; break;
16120 case 0x0A: laneszB
= 4; str
= "d"; break;
16121 default: vassert(0);
16124 modrm
= getUChar(delta
);
16125 assign( dV
, getXMMReg(gregOfRexRM(pfx
,modrm
)) );
16127 if (epartIsReg(modrm
)) {
16128 assign( sV
, getXMMReg(eregOfRexRM(pfx
,modrm
)) );
16130 DIP("psign%s %s,%s\n", str
, nameXMMReg(eregOfRexRM(pfx
,modrm
)),
16131 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
16133 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
16134 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
16135 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
16137 DIP("psign%s %s,%s\n", str
, dis_buf
,
16138 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
16141 assign( dHi
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
16142 assign( dLo
, unop(Iop_V128to64
, mkexpr(dV
)) );
16143 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
16144 assign( sLo
, unop(Iop_V128to64
, mkexpr(sV
)) );
16147 gregOfRexRM(pfx
,modrm
),
16148 binop(Iop_64HLtoV128
,
16149 dis_PSIGN_helper( mkexpr(sHi
), mkexpr(dHi
), laneszB
),
16150 dis_PSIGN_helper( mkexpr(sLo
), mkexpr(dLo
), laneszB
)
16153 goto decode_success
;
16155 /* 0F 38 08 = PSIGNB -- Packed Sign 8x8 (MMX) */
16156 /* 0F 38 09 = PSIGNW -- Packed Sign 16x4 (MMX) */
16157 /* 0F 38 0A = PSIGND -- Packed Sign 32x2 (MMX) */
16158 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
16159 IRTemp sV
= newTemp(Ity_I64
);
16160 IRTemp dV
= newTemp(Ity_I64
);
16161 const HChar
* str
= "???";
16165 case 0x08: laneszB
= 1; str
= "b"; break;
16166 case 0x09: laneszB
= 2; str
= "w"; break;
16167 case 0x0A: laneszB
= 4; str
= "d"; break;
16168 default: vassert(0);
16171 modrm
= getUChar(delta
);
16173 assign( dV
, getMMXReg(gregLO3ofRM(modrm
)) );
16175 if (epartIsReg(modrm
)) {
16176 assign( sV
, getMMXReg(eregLO3ofRM(modrm
)) );
16178 DIP("psign%s %s,%s\n", str
, nameMMXReg(eregLO3ofRM(modrm
)),
16179 nameMMXReg(gregLO3ofRM(modrm
)));
16181 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
16182 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
16184 DIP("psign%s %s,%s\n", str
, dis_buf
,
16185 nameMMXReg(gregLO3ofRM(modrm
)));
16189 gregLO3ofRM(modrm
),
16190 dis_PSIGN_helper( mkexpr(sV
), mkexpr(dV
), laneszB
)
16192 goto decode_success
;
16197 /* 66 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and
16199 if (have66noF2noF3(pfx
)
16200 && (sz
== 2 || /*redundant REX.W*/ sz
== 8)) {
16201 IRTemp sV
= newTemp(Ity_V128
);
16202 IRTemp dV
= newTemp(Ity_V128
);
16203 IRTemp sHi
= newTemp(Ity_I64
);
16204 IRTemp sLo
= newTemp(Ity_I64
);
16205 IRTemp dHi
= newTemp(Ity_I64
);
16206 IRTemp dLo
= newTemp(Ity_I64
);
16208 modrm
= getUChar(delta
);
16209 assign( dV
, getXMMReg(gregOfRexRM(pfx
,modrm
)) );
16211 if (epartIsReg(modrm
)) {
16212 assign( sV
, getXMMReg(eregOfRexRM(pfx
,modrm
)) );
16214 DIP("pmulhrsw %s,%s\n", nameXMMReg(eregOfRexRM(pfx
,modrm
)),
16215 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
16217 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
16218 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
16219 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
16221 DIP("pmulhrsw %s,%s\n", dis_buf
,
16222 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
16225 assign( dHi
, unop(Iop_V128HIto64
, mkexpr(dV
)) );
16226 assign( dLo
, unop(Iop_V128to64
, mkexpr(dV
)) );
16227 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
16228 assign( sLo
, unop(Iop_V128to64
, mkexpr(sV
)) );
16231 gregOfRexRM(pfx
,modrm
),
16232 binop(Iop_64HLtoV128
,
16233 dis_PMULHRSW_helper( mkexpr(sHi
), mkexpr(dHi
) ),
16234 dis_PMULHRSW_helper( mkexpr(sLo
), mkexpr(dLo
) )
16237 goto decode_success
;
16239 /* 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and Scale
16241 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
16242 IRTemp sV
= newTemp(Ity_I64
);
16243 IRTemp dV
= newTemp(Ity_I64
);
16245 modrm
= getUChar(delta
);
16247 assign( dV
, getMMXReg(gregLO3ofRM(modrm
)) );
16249 if (epartIsReg(modrm
)) {
16250 assign( sV
, getMMXReg(eregLO3ofRM(modrm
)) );
16252 DIP("pmulhrsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm
)),
16253 nameMMXReg(gregLO3ofRM(modrm
)));
16255 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
16256 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
16258 DIP("pmulhrsw %s,%s\n", dis_buf
,
16259 nameMMXReg(gregLO3ofRM(modrm
)));
16263 gregLO3ofRM(modrm
),
16264 dis_PMULHRSW_helper( mkexpr(sV
), mkexpr(dV
) )
16266 goto decode_success
;
16273 /* 66 0F 38 1C = PABSB -- Packed Absolute Value 8x16 (XMM) */
16274 /* 66 0F 38 1D = PABSW -- Packed Absolute Value 16x8 (XMM) */
16275 /* 66 0F 38 1E = PABSD -- Packed Absolute Value 32x4 (XMM) */
16276 if (have66noF2noF3(pfx
)
16277 && (sz
== 2 || /*redundant REX.W*/ sz
== 8)) {
16278 IRTemp sV
= newTemp(Ity_V128
);
16279 const HChar
* str
= "???";
16283 case 0x1C: laneszB
= 1; str
= "b"; break;
16284 case 0x1D: laneszB
= 2; str
= "w"; break;
16285 case 0x1E: laneszB
= 4; str
= "d"; break;
16286 default: vassert(0);
16289 modrm
= getUChar(delta
);
16290 if (epartIsReg(modrm
)) {
16291 assign( sV
, getXMMReg(eregOfRexRM(pfx
,modrm
)) );
16293 DIP("pabs%s %s,%s\n", str
, nameXMMReg(eregOfRexRM(pfx
,modrm
)),
16294 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
16296 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
16297 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
16298 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
16300 DIP("pabs%s %s,%s\n", str
, dis_buf
,
16301 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
16304 putXMMReg( gregOfRexRM(pfx
,modrm
),
16305 mkexpr(math_PABS_XMM(sV
, laneszB
)) );
16306 goto decode_success
;
16308 /* 0F 38 1C = PABSB -- Packed Absolute Value 8x8 (MMX) */
16309 /* 0F 38 1D = PABSW -- Packed Absolute Value 16x4 (MMX) */
16310 /* 0F 38 1E = PABSD -- Packed Absolute Value 32x2 (MMX) */
16311 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
16312 IRTemp sV
= newTemp(Ity_I64
);
16313 const HChar
* str
= "???";
16317 case 0x1C: laneszB
= 1; str
= "b"; break;
16318 case 0x1D: laneszB
= 2; str
= "w"; break;
16319 case 0x1E: laneszB
= 4; str
= "d"; break;
16320 default: vassert(0);
16323 modrm
= getUChar(delta
);
16326 if (epartIsReg(modrm
)) {
16327 assign( sV
, getMMXReg(eregLO3ofRM(modrm
)) );
16329 DIP("pabs%s %s,%s\n", str
, nameMMXReg(eregLO3ofRM(modrm
)),
16330 nameMMXReg(gregLO3ofRM(modrm
)));
16332 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
16333 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
16335 DIP("pabs%s %s,%s\n", str
, dis_buf
,
16336 nameMMXReg(gregLO3ofRM(modrm
)));
16339 putMMXReg( gregLO3ofRM(modrm
),
16340 mkexpr(math_PABS_MMX( sV
, laneszB
)) );
16341 goto decode_success
;
16351 *decode_OK
= False
;
16360 /*------------------------------------------------------------*/
16362 /*--- Top-level SSSE3: dis_ESC_0F3A__SupSSE3 ---*/
16364 /*------------------------------------------------------------*/
16366 __attribute__((noinline
))
16368 Long
dis_ESC_0F3A__SupSSE3 ( Bool
* decode_OK
,
16369 const VexAbiInfo
* vbi
,
16370 Prefix pfx
, Int sz
, Long deltaIN
)
16373 IRTemp addr
= IRTemp_INVALID
;
16378 *decode_OK
= False
;
16380 Long delta
= deltaIN
;
16381 UChar opc
= getUChar(delta
);
16386 /* 66 0F 3A 0F = PALIGNR -- Packed Align Right (XMM) */
16387 if (have66noF2noF3(pfx
)
16388 && (sz
== 2 || /*redundant REX.W*/ sz
== 8)) {
16389 IRTemp sV
= newTemp(Ity_V128
);
16390 IRTemp dV
= newTemp(Ity_V128
);
16392 modrm
= getUChar(delta
);
16393 assign( dV
, getXMMReg(gregOfRexRM(pfx
,modrm
)) );
16395 if (epartIsReg(modrm
)) {
16396 assign( sV
, getXMMReg(eregOfRexRM(pfx
,modrm
)) );
16397 d64
= (Long
)getUChar(delta
+1);
16399 DIP("palignr $%lld,%s,%s\n", d64
,
16400 nameXMMReg(eregOfRexRM(pfx
,modrm
)),
16401 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
16403 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
16404 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
16405 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
16406 d64
= (Long
)getUChar(delta
+alen
);
16408 DIP("palignr $%lld,%s,%s\n", d64
,
16410 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
16413 IRTemp res
= math_PALIGNR_XMM( sV
, dV
, d64
);
16414 putXMMReg( gregOfRexRM(pfx
,modrm
), mkexpr(res
) );
16415 goto decode_success
;
16417 /* 0F 3A 0F = PALIGNR -- Packed Align Right (MMX) */
16418 if (haveNo66noF2noF3(pfx
) && sz
== 4) {
16419 IRTemp sV
= newTemp(Ity_I64
);
16420 IRTemp dV
= newTemp(Ity_I64
);
16421 IRTemp res
= newTemp(Ity_I64
);
16423 modrm
= getUChar(delta
);
16425 assign( dV
, getMMXReg(gregLO3ofRM(modrm
)) );
16427 if (epartIsReg(modrm
)) {
16428 assign( sV
, getMMXReg(eregLO3ofRM(modrm
)) );
16429 d64
= (Long
)getUChar(delta
+1);
16431 DIP("palignr $%lld,%s,%s\n", d64
,
16432 nameMMXReg(eregLO3ofRM(modrm
)),
16433 nameMMXReg(gregLO3ofRM(modrm
)));
16435 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
16436 assign( sV
, loadLE(Ity_I64
, mkexpr(addr
)) );
16437 d64
= (Long
)getUChar(delta
+alen
);
16439 DIP("palignr $%lld%s,%s\n", d64
,
16441 nameMMXReg(gregLO3ofRM(modrm
)));
16445 assign( res
, mkexpr(sV
) );
16447 else if (d64
>= 1 && d64
<= 7) {
16450 binop(Iop_Shr64
, mkexpr(sV
), mkU8(8*d64
)),
16451 binop(Iop_Shl64
, mkexpr(dV
), mkU8(8*(8-d64
))
16454 else if (d64
== 8) {
16455 assign( res
, mkexpr(dV
) );
16457 else if (d64
>= 9 && d64
<= 15) {
16458 assign( res
, binop(Iop_Shr64
, mkexpr(dV
), mkU8(8*(d64
-8))) );
16460 else if (d64
>= 16 && d64
<= 255) {
16461 assign( res
, mkU64(0) );
16466 putMMXReg( gregLO3ofRM(modrm
), mkexpr(res
) );
16467 goto decode_success
;
16477 *decode_OK
= False
;
16486 /*------------------------------------------------------------*/
16488 /*--- Top-level SSE4: dis_ESC_0F__SSE4 ---*/
16490 /*------------------------------------------------------------*/
16492 __attribute__((noinline
))
16494 Long
dis_ESC_0F__SSE4 ( Bool
* decode_OK
,
16495 const VexArchInfo
* archinfo
,
16496 const VexAbiInfo
* vbi
,
16497 Prefix pfx
, Int sz
, Long deltaIN
)
16499 IRTemp addr
= IRTemp_INVALID
;
16500 IRType ty
= Ity_INVALID
;
16505 *decode_OK
= False
;
16507 Long delta
= deltaIN
;
16508 UChar opc
= getUChar(delta
);
16513 /* F3 0F B8 = POPCNT{W,L,Q}
16514 Count the number of 1 bits in a register
16516 if (haveF3noF2(pfx
) /* so both 66 and REX.W are possibilities */
16517 && (sz
== 2 || sz
== 4 || sz
== 8)) {
16518 /*IRType*/ ty
= szToITy(sz
);
16519 IRTemp src
= newTemp(ty
);
16520 modrm
= getUChar(delta
);
16521 if (epartIsReg(modrm
)) {
16522 assign(src
, getIRegE(sz
, pfx
, modrm
));
16524 DIP("popcnt%c %s, %s\n", nameISize(sz
), nameIRegE(sz
, pfx
, modrm
),
16525 nameIRegG(sz
, pfx
, modrm
));
16527 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0);
16528 assign(src
, loadLE(ty
, mkexpr(addr
)));
16530 DIP("popcnt%c %s, %s\n", nameISize(sz
), dis_buf
,
16531 nameIRegG(sz
, pfx
, modrm
));
16534 IRTemp result
= gen_POPCOUNT(ty
, src
);
16535 putIRegG(sz
, pfx
, modrm
, mkexpr(result
));
16537 // Update flags. This is pretty lame .. perhaps can do better
16538 // if this turns out to be performance critical.
16539 // O S A C P are cleared. Z is set if SRC == 0.
16540 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
16541 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
16542 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
16543 stmt( IRStmt_Put( OFFB_CC_DEP1
,
16547 widenUto64(mkexpr(src
)),
16549 mkU8(AMD64G_CC_SHIFT_Z
))));
16551 goto decode_success
;
16556 /* F3 0F BC -- TZCNT (count trailing zeroes. A BMI extension,
16557 which we can only decode if we're sure this is a BMI1 capable cpu
16558 that supports TZCNT, since otherwise it's BSF, which behaves
16559 differently on zero source. */
16560 if (haveF3noF2(pfx
) /* so both 66 and 48 are possibilities */
16561 && (sz
== 2 || sz
== 4 || sz
== 8)
16562 && 0 != (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_BMI
)) {
16563 /*IRType*/ ty
= szToITy(sz
);
16564 IRTemp src
= newTemp(ty
);
16565 modrm
= getUChar(delta
);
16566 if (epartIsReg(modrm
)) {
16567 assign(src
, getIRegE(sz
, pfx
, modrm
));
16569 DIP("tzcnt%c %s, %s\n", nameISize(sz
), nameIRegE(sz
, pfx
, modrm
),
16570 nameIRegG(sz
, pfx
, modrm
));
16572 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0);
16573 assign(src
, loadLE(ty
, mkexpr(addr
)));
16575 DIP("tzcnt%c %s, %s\n", nameISize(sz
), dis_buf
,
16576 nameIRegG(sz
, pfx
, modrm
));
16579 IRTemp res
= gen_TZCNT(ty
, src
);
16580 putIRegG(sz
, pfx
, modrm
, mkexpr(res
));
16582 // Update flags. This is pretty lame .. perhaps can do better
16583 // if this turns out to be performance critical.
16584 // O S A P are cleared. Z is set if RESULT == 0.
16585 // C is set if SRC is zero.
16586 IRTemp src64
= newTemp(Ity_I64
);
16587 IRTemp res64
= newTemp(Ity_I64
);
16588 assign(src64
, widenUto64(mkexpr(src
)));
16589 assign(res64
, widenUto64(mkexpr(res
)));
16591 IRTemp oszacp
= newTemp(Ity_I64
);
16597 binop(Iop_CmpEQ64
, mkexpr(res64
), mkU64(0))),
16598 mkU8(AMD64G_CC_SHIFT_Z
)),
16601 binop(Iop_CmpEQ64
, mkexpr(src64
), mkU64(0))),
16602 mkU8(AMD64G_CC_SHIFT_C
))
16606 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
16607 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
16608 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
16609 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(oszacp
) ));
16611 goto decode_success
;
16616 /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension,
16617 which we can only decode if we're sure this is an AMD cpu
16618 that supports LZCNT, since otherwise it's BSR, which behaves
16619 differently. Bizarrely, my Sandy Bridge also accepts these
16620 instructions but produces different results. */
16621 if (haveF3noF2(pfx
) /* so both 66 and 48 are possibilities */
16622 && (sz
== 2 || sz
== 4 || sz
== 8)
16623 && 0 != (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_LZCNT
)) {
16624 /*IRType*/ ty
= szToITy(sz
);
16625 IRTemp src
= newTemp(ty
);
16626 modrm
= getUChar(delta
);
16627 if (epartIsReg(modrm
)) {
16628 assign(src
, getIRegE(sz
, pfx
, modrm
));
16630 DIP("lzcnt%c %s, %s\n", nameISize(sz
), nameIRegE(sz
, pfx
, modrm
),
16631 nameIRegG(sz
, pfx
, modrm
));
16633 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0);
16634 assign(src
, loadLE(ty
, mkexpr(addr
)));
16636 DIP("lzcnt%c %s, %s\n", nameISize(sz
), dis_buf
,
16637 nameIRegG(sz
, pfx
, modrm
));
16640 IRTemp res
= gen_LZCNT(ty
, src
);
16641 putIRegG(sz
, pfx
, modrm
, mkexpr(res
));
16643 // Update flags. This is pretty lame .. perhaps can do better
16644 // if this turns out to be performance critical.
16645 // O S A P are cleared. Z is set if RESULT == 0.
16646 // C is set if SRC is zero.
16647 IRTemp src64
= newTemp(Ity_I64
);
16648 IRTemp res64
= newTemp(Ity_I64
);
16649 assign(src64
, widenUto64(mkexpr(src
)));
16650 assign(res64
, widenUto64(mkexpr(res
)));
16652 IRTemp oszacp
= newTemp(Ity_I64
);
16658 binop(Iop_CmpEQ64
, mkexpr(res64
), mkU64(0))),
16659 mkU8(AMD64G_CC_SHIFT_Z
)),
16662 binop(Iop_CmpEQ64
, mkexpr(src64
), mkU64(0))),
16663 mkU8(AMD64G_CC_SHIFT_C
))
16667 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
16668 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
16669 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
16670 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(oszacp
) ));
16672 goto decode_success
;
16682 *decode_OK
= False
;
16691 /*------------------------------------------------------------*/
16693 /*--- Top-level SSE4: dis_ESC_0F38__SSE4 ---*/
16695 /*------------------------------------------------------------*/
16697 static IRTemp
math_PBLENDVB_128 ( IRTemp vecE
, IRTemp vecG
,
16698 IRTemp vec0
/*controlling mask*/,
16699 UInt gran
, IROp opSAR
)
16701 /* The tricky bit is to convert vec0 into a suitable mask, by
16702 copying the most significant bit of each lane into all positions
16704 IRTemp sh
= newTemp(Ity_I8
);
16705 assign(sh
, mkU8(8 * gran
- 1));
16707 IRTemp mask
= newTemp(Ity_V128
);
16708 assign(mask
, binop(opSAR
, mkexpr(vec0
), mkexpr(sh
)));
16710 IRTemp notmask
= newTemp(Ity_V128
);
16711 assign(notmask
, unop(Iop_NotV128
, mkexpr(mask
)));
16713 IRTemp res
= newTemp(Ity_V128
);
16714 assign(res
, binop(Iop_OrV128
,
16715 binop(Iop_AndV128
, mkexpr(vecE
), mkexpr(mask
)),
16716 binop(Iop_AndV128
, mkexpr(vecG
), mkexpr(notmask
))));
16720 static IRTemp
math_PBLENDVB_256 ( IRTemp vecE
, IRTemp vecG
,
16721 IRTemp vec0
/*controlling mask*/,
16722 UInt gran
, IROp opSAR128
)
16724 /* The tricky bit is to convert vec0 into a suitable mask, by
16725 copying the most significant bit of each lane into all positions
16727 IRTemp sh
= newTemp(Ity_I8
);
16728 assign(sh
, mkU8(8 * gran
- 1));
16730 IRTemp vec0Hi
= IRTemp_INVALID
;
16731 IRTemp vec0Lo
= IRTemp_INVALID
;
16732 breakupV256toV128s( vec0
, &vec0Hi
, &vec0Lo
);
16734 IRTemp mask
= newTemp(Ity_V256
);
16735 assign(mask
, binop(Iop_V128HLtoV256
,
16736 binop(opSAR128
, mkexpr(vec0Hi
), mkexpr(sh
)),
16737 binop(opSAR128
, mkexpr(vec0Lo
), mkexpr(sh
))));
16739 IRTemp notmask
= newTemp(Ity_V256
);
16740 assign(notmask
, unop(Iop_NotV256
, mkexpr(mask
)));
16742 IRTemp res
= newTemp(Ity_V256
);
16743 assign(res
, binop(Iop_OrV256
,
16744 binop(Iop_AndV256
, mkexpr(vecE
), mkexpr(mask
)),
16745 binop(Iop_AndV256
, mkexpr(vecG
), mkexpr(notmask
))));
16749 static Long
dis_VBLENDV_128 ( const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
,
16750 const HChar
*name
, UInt gran
, IROp opSAR
)
16752 IRTemp addr
= IRTemp_INVALID
;
16755 UChar modrm
= getUChar(delta
);
16756 UInt rG
= gregOfRexRM(pfx
, modrm
);
16757 UInt rV
= getVexNvvvv(pfx
);
16758 UInt rIS4
= 0xFF; /* invalid */
16759 IRTemp vecE
= newTemp(Ity_V128
);
16760 IRTemp vecV
= newTemp(Ity_V128
);
16761 IRTemp vecIS4
= newTemp(Ity_V128
);
16762 if (epartIsReg(modrm
)) {
16764 UInt rE
= eregOfRexRM(pfx
, modrm
);
16765 assign(vecE
, getXMMReg(rE
));
16766 UChar ib
= getUChar(delta
);
16767 rIS4
= (ib
>> 4) & 0xF;
16768 DIP("%s %s,%s,%s,%s\n",
16769 name
, nameXMMReg(rIS4
), nameXMMReg(rE
),
16770 nameXMMReg(rV
), nameXMMReg(rG
));
16772 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
16774 assign(vecE
, loadLE(Ity_V128
, mkexpr(addr
)));
16775 UChar ib
= getUChar(delta
);
16776 rIS4
= (ib
>> 4) & 0xF;
16777 DIP("%s %s,%s,%s,%s\n",
16778 name
, nameXMMReg(rIS4
), dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
16781 assign(vecV
, getXMMReg(rV
));
16782 assign(vecIS4
, getXMMReg(rIS4
));
16783 IRTemp res
= math_PBLENDVB_128( vecE
, vecV
, vecIS4
, gran
, opSAR
);
16784 putYMMRegLoAndZU( rG
, mkexpr(res
) );
16788 static Long
dis_VBLENDV_256 ( const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
,
16789 const HChar
*name
, UInt gran
, IROp opSAR128
)
16791 IRTemp addr
= IRTemp_INVALID
;
16794 UChar modrm
= getUChar(delta
);
16795 UInt rG
= gregOfRexRM(pfx
, modrm
);
16796 UInt rV
= getVexNvvvv(pfx
);
16797 UInt rIS4
= 0xFF; /* invalid */
16798 IRTemp vecE
= newTemp(Ity_V256
);
16799 IRTemp vecV
= newTemp(Ity_V256
);
16800 IRTemp vecIS4
= newTemp(Ity_V256
);
16801 if (epartIsReg(modrm
)) {
16803 UInt rE
= eregOfRexRM(pfx
, modrm
);
16804 assign(vecE
, getYMMReg(rE
));
16805 UChar ib
= getUChar(delta
);
16806 rIS4
= (ib
>> 4) & 0xF;
16807 DIP("%s %s,%s,%s,%s\n",
16808 name
, nameYMMReg(rIS4
), nameYMMReg(rE
),
16809 nameYMMReg(rV
), nameYMMReg(rG
));
16811 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
16813 assign(vecE
, loadLE(Ity_V256
, mkexpr(addr
)));
16814 UChar ib
= getUChar(delta
);
16815 rIS4
= (ib
>> 4) & 0xF;
16816 DIP("%s %s,%s,%s,%s\n",
16817 name
, nameYMMReg(rIS4
), dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
16820 assign(vecV
, getYMMReg(rV
));
16821 assign(vecIS4
, getYMMReg(rIS4
));
16822 IRTemp res
= math_PBLENDVB_256( vecE
, vecV
, vecIS4
, gran
, opSAR128
);
16823 putYMMReg( rG
, mkexpr(res
) );
16827 static void finish_xTESTy ( IRTemp andV
, IRTemp andnV
, Int sign
)
16829 /* Set Z=1 iff (vecE & vecG) == 0
16830 Set C=1 iff (vecE & not vecG) == 0
16833 /* andV, andnV: vecE & vecG, vecE and not(vecG) */
16835 /* andV resp. andnV, reduced to 64-bit values, by or-ing the top
16836 and bottom 64-bits together. It relies on this trick:
16838 InterleaveLO64x2([a,b],[c,d]) == [b,d] hence
16840 InterleaveLO64x2([a,b],[a,b]) == [b,b] and similarly
16841 InterleaveHI64x2([a,b],[a,b]) == [a,a]
16843 and so the OR of the above 2 exprs produces
16844 [a OR b, a OR b], from which we simply take the lower half.
16846 IRTemp and64
= newTemp(Ity_I64
);
16847 IRTemp andn64
= newTemp(Ity_I64
);
16852 binop(Iop_InterleaveLO64x2
,
16853 mkexpr(andV
), mkexpr(andV
)),
16854 binop(Iop_InterleaveHI64x2
,
16855 mkexpr(andV
), mkexpr(andV
)))));
16860 binop(Iop_InterleaveLO64x2
,
16861 mkexpr(andnV
), mkexpr(andnV
)),
16862 binop(Iop_InterleaveHI64x2
,
16863 mkexpr(andnV
), mkexpr(andnV
)))));
16865 IRTemp z64
= newTemp(Ity_I64
);
16866 IRTemp c64
= newTemp(Ity_I64
);
16868 /* When only interested in the most significant bit, just shift
16869 arithmetically right and negate. */
16872 binop(Iop_Sar64
, mkexpr(and64
), mkU8(63))));
16876 binop(Iop_Sar64
, mkexpr(andn64
), mkU8(63))));
16879 /* When interested in bit 31 and bit 63, mask those bits and
16880 fallthrough into the PTEST handling. */
16881 IRTemp t0
= newTemp(Ity_I64
);
16882 IRTemp t1
= newTemp(Ity_I64
);
16883 IRTemp t2
= newTemp(Ity_I64
);
16884 assign(t0
, mkU64(0x8000000080000000ULL
));
16885 assign(t1
, binop(Iop_And64
, mkexpr(and64
), mkexpr(t0
)));
16886 assign(t2
, binop(Iop_And64
, mkexpr(andn64
), mkexpr(t0
)));
16890 /* Now convert and64, andn64 to all-zeroes or all-1s, so we can
16891 slice out the Z and C bits conveniently. We use the standard
16892 trick all-zeroes -> all-zeroes, anything-else -> all-ones
16893 done by "(x | -x) >>s (word-size - 1)".
16899 binop(Iop_Sub64
, mkU64(0), mkexpr(and64
)),
16900 mkexpr(and64
)), mkU8(63))));
16906 binop(Iop_Sub64
, mkU64(0), mkexpr(andn64
)),
16907 mkexpr(andn64
)), mkU8(63))));
16910 /* And finally, slice out the Z and C flags and set the flags
16911 thunk to COPY for them. OSAP are set to zero. */
16912 IRTemp newOSZACP
= newTemp(Ity_I64
);
16915 binop(Iop_And64
, mkexpr(z64
), mkU64(AMD64G_CC_MASK_Z
)),
16916 binop(Iop_And64
, mkexpr(c64
), mkU64(AMD64G_CC_MASK_C
))));
16918 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(newOSZACP
)));
16919 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
16920 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
16921 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
16925 /* Handles 128 bit versions of PTEST, VTESTPS or VTESTPD.
16926 sign is 0 for PTEST insn, 32 for VTESTPS and 64 for VTESTPD. */
16927 static Long
dis_xTESTy_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
16928 Long delta
, Bool isAvx
, Int sign
)
16930 IRTemp addr
= IRTemp_INVALID
;
16933 UChar modrm
= getUChar(delta
);
16934 UInt rG
= gregOfRexRM(pfx
, modrm
);
16935 IRTemp vecE
= newTemp(Ity_V128
);
16936 IRTemp vecG
= newTemp(Ity_V128
);
16938 if ( epartIsReg(modrm
) ) {
16939 UInt rE
= eregOfRexRM(pfx
, modrm
);
16940 assign(vecE
, getXMMReg(rE
));
16942 DIP( "%s%stest%s %s,%s\n",
16943 isAvx
? "v" : "", sign
== 0 ? "p" : "",
16944 sign
== 0 ? "" : sign
== 32 ? "ps" : "pd",
16945 nameXMMReg(rE
), nameXMMReg(rG
) );
16947 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
16949 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
16950 assign(vecE
, loadLE( Ity_V128
, mkexpr(addr
) ));
16952 DIP( "%s%stest%s %s,%s\n",
16953 isAvx
? "v" : "", sign
== 0 ? "p" : "",
16954 sign
== 0 ? "" : sign
== 32 ? "ps" : "pd",
16955 dis_buf
, nameXMMReg(rG
) );
16958 assign(vecG
, getXMMReg(rG
));
16960 /* Set Z=1 iff (vecE & vecG) == 0
16961 Set C=1 iff (vecE & not vecG) == 0
16964 /* andV, andnV: vecE & vecG, vecE and not(vecG) */
16965 IRTemp andV
= newTemp(Ity_V128
);
16966 IRTemp andnV
= newTemp(Ity_V128
);
16967 assign(andV
, binop(Iop_AndV128
, mkexpr(vecE
), mkexpr(vecG
)));
16968 assign(andnV
, binop(Iop_AndV128
,
16970 binop(Iop_XorV128
, mkexpr(vecG
),
16973 finish_xTESTy ( andV
, andnV
, sign
);
16978 /* Handles 256 bit versions of PTEST, VTESTPS or VTESTPD.
16979 sign is 0 for PTEST insn, 32 for VTESTPS and 64 for VTESTPD. */
16980 static Long
dis_xTESTy_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
16981 Long delta
, Int sign
)
16983 IRTemp addr
= IRTemp_INVALID
;
16986 UChar modrm
= getUChar(delta
);
16987 UInt rG
= gregOfRexRM(pfx
, modrm
);
16988 IRTemp vecE
= newTemp(Ity_V256
);
16989 IRTemp vecG
= newTemp(Ity_V256
);
16991 if ( epartIsReg(modrm
) ) {
16992 UInt rE
= eregOfRexRM(pfx
, modrm
);
16993 assign(vecE
, getYMMReg(rE
));
16995 DIP( "v%stest%s %s,%s\n", sign
== 0 ? "p" : "",
16996 sign
== 0 ? "" : sign
== 32 ? "ps" : "pd",
16997 nameYMMReg(rE
), nameYMMReg(rG
) );
16999 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17000 assign(vecE
, loadLE( Ity_V256
, mkexpr(addr
) ));
17002 DIP( "v%stest%s %s,%s\n", sign
== 0 ? "p" : "",
17003 sign
== 0 ? "" : sign
== 32 ? "ps" : "pd",
17004 dis_buf
, nameYMMReg(rG
) );
17007 assign(vecG
, getYMMReg(rG
));
17009 /* Set Z=1 iff (vecE & vecG) == 0
17010 Set C=1 iff (vecE & not vecG) == 0
17013 /* andV, andnV: vecE & vecG, vecE and not(vecG) */
17014 IRTemp andV
= newTemp(Ity_V256
);
17015 IRTemp andnV
= newTemp(Ity_V256
);
17016 assign(andV
, binop(Iop_AndV256
, mkexpr(vecE
), mkexpr(vecG
)));
17017 assign(andnV
, binop(Iop_AndV256
,
17018 mkexpr(vecE
), unop(Iop_NotV256
, mkexpr(vecG
))));
17020 IRTemp andVhi
= IRTemp_INVALID
;
17021 IRTemp andVlo
= IRTemp_INVALID
;
17022 IRTemp andnVhi
= IRTemp_INVALID
;
17023 IRTemp andnVlo
= IRTemp_INVALID
;
17024 breakupV256toV128s( andV
, &andVhi
, &andVlo
);
17025 breakupV256toV128s( andnV
, &andnVhi
, &andnVlo
);
17027 IRTemp andV128
= newTemp(Ity_V128
);
17028 IRTemp andnV128
= newTemp(Ity_V128
);
17029 assign( andV128
, binop( Iop_OrV128
, mkexpr(andVhi
), mkexpr(andVlo
) ) );
17030 assign( andnV128
, binop( Iop_OrV128
, mkexpr(andnVhi
), mkexpr(andnVlo
) ) );
17032 finish_xTESTy ( andV128
, andnV128
, sign
);
17037 /* Handles 128 and 256 bit versions of VCVTPH2PS. */
17038 static Long
dis_VCVTPH2PS ( const VexAbiInfo
* vbi
, Prefix pfx
,
17039 Long delta
, Bool is256bit
)
17041 /* This is a width-doubling load or reg-reg move, that does conversion on the
17042 transferred data. */
17043 UChar modrm
= getUChar(delta
);
17044 UInt rG
= gregOfRexRM(pfx
, modrm
);
17045 IRTemp srcE
= newTemp(is256bit
? Ity_V128
: Ity_I64
);
17047 if (epartIsReg(modrm
)) {
17048 UInt rE
= eregOfRexRM(pfx
, modrm
);
17049 assign(srcE
, is256bit
? unop(Iop_V256toV128_0
, getYMMReg(rE
))
17050 : unop(Iop_V128to64
, getXMMReg(rE
)));
17052 DIP("vcvtph2ps %s,%s\n", nameXMMReg(rE
),
17053 (is256bit
? nameYMMReg
: nameXMMReg
)(rG
));
17057 IRTemp addr
= disAMode(&alen
, vbi
, pfx
, delta
, dis_buf
, 0);
17058 // I don't think we need an alignment check here (not 100% sure tho.)
17059 assign(srcE
, loadLE(is256bit
? Ity_V128
: Ity_I64
, mkexpr(addr
)));
17061 DIP( "vcvtph2ps %s,%s\n", dis_buf
,
17062 (is256bit
? nameYMMReg
: nameXMMReg
)(rG
));
17065 IRExpr
* res
= unop(is256bit
? Iop_F16toF32x8
: Iop_F16toF32x4
, mkexpr(srcE
));
17066 (is256bit
? putYMMReg
: putYMMRegLoAndZU
)(rG
, res
);
17072 /* Handles 128 bit versions of PMOVZXBW and PMOVSXBW. */
17073 static Long
dis_PMOVxXBW_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17074 Long delta
, Bool isAvx
, Bool xIsZ
)
17076 IRTemp addr
= IRTemp_INVALID
;
17079 IRTemp srcVec
= newTemp(Ity_V128
);
17080 UChar modrm
= getUChar(delta
);
17081 const HChar
* mbV
= isAvx
? "v" : "";
17082 const HChar how
= xIsZ
? 'z' : 's';
17083 UInt rG
= gregOfRexRM(pfx
, modrm
);
17084 if ( epartIsReg(modrm
) ) {
17085 UInt rE
= eregOfRexRM(pfx
, modrm
);
17086 assign( srcVec
, getXMMReg(rE
) );
17088 DIP( "%spmov%cxbw %s,%s\n", mbV
, how
, nameXMMReg(rE
), nameXMMReg(rG
) );
17090 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17092 unop( Iop_64UtoV128
, loadLE( Ity_I64
, mkexpr(addr
) ) ) );
17094 DIP( "%spmov%cxbw %s,%s\n", mbV
, how
, dis_buf
, nameXMMReg(rG
) );
17098 = xIsZ
/* do math for either zero or sign extend */
17099 ? binop( Iop_InterleaveLO8x16
,
17100 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec
) )
17101 : binop( Iop_SarN16x8
,
17102 binop( Iop_ShlN16x8
,
17103 binop( Iop_InterleaveLO8x16
,
17104 IRExpr_Const( IRConst_V128(0) ),
17109 (isAvx
? putYMMRegLoAndZU
: putXMMReg
) ( rG
, res
);
17115 /* Handles 256 bit versions of PMOVZXBW and PMOVSXBW. */
17116 static Long
dis_PMOVxXBW_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17117 Long delta
, Bool xIsZ
)
17119 IRTemp addr
= IRTemp_INVALID
;
17122 IRTemp srcVec
= newTemp(Ity_V128
);
17123 UChar modrm
= getUChar(delta
);
17124 UChar how
= xIsZ
? 'z' : 's';
17125 UInt rG
= gregOfRexRM(pfx
, modrm
);
17126 if ( epartIsReg(modrm
) ) {
17127 UInt rE
= eregOfRexRM(pfx
, modrm
);
17128 assign( srcVec
, getXMMReg(rE
) );
17130 DIP( "vpmov%cxbw %s,%s\n", how
, nameXMMReg(rE
), nameYMMReg(rG
) );
17132 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17133 assign( srcVec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
17135 DIP( "vpmov%cxbw %s,%s\n", how
, dis_buf
, nameYMMReg(rG
) );
17138 /* First do zero extend. */
17140 = binop( Iop_V128HLtoV256
,
17141 binop( Iop_InterleaveHI8x16
,
17142 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec
) ),
17143 binop( Iop_InterleaveLO8x16
,
17144 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec
) ) );
17145 /* And if needed sign extension as well. */
17147 res
= binop( Iop_SarN16x16
,
17148 binop( Iop_ShlN16x16
, res
, mkU8(8) ), mkU8(8) );
17150 putYMMReg ( rG
, res
);
17156 static Long
dis_PMOVxXWD_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17157 Long delta
, Bool isAvx
, Bool xIsZ
)
17159 IRTemp addr
= IRTemp_INVALID
;
17162 IRTemp srcVec
= newTemp(Ity_V128
);
17163 UChar modrm
= getUChar(delta
);
17164 const HChar
* mbV
= isAvx
? "v" : "";
17165 const HChar how
= xIsZ
? 'z' : 's';
17166 UInt rG
= gregOfRexRM(pfx
, modrm
);
17168 if ( epartIsReg(modrm
) ) {
17169 UInt rE
= eregOfRexRM(pfx
, modrm
);
17170 assign( srcVec
, getXMMReg(rE
) );
17172 DIP( "%spmov%cxwd %s,%s\n", mbV
, how
, nameXMMReg(rE
), nameXMMReg(rG
) );
17174 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17176 unop( Iop_64UtoV128
, loadLE( Ity_I64
, mkexpr(addr
) ) ) );
17178 DIP( "%spmov%cxwd %s,%s\n", mbV
, how
, dis_buf
, nameXMMReg(rG
) );
17182 = binop( Iop_InterleaveLO16x8
,
17183 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec
) );
17185 res
= binop(Iop_SarN32x4
,
17186 binop(Iop_ShlN32x4
, res
, mkU8(16)), mkU8(16));
17188 (isAvx
? putYMMRegLoAndZU
: putXMMReg
)
17189 ( gregOfRexRM(pfx
, modrm
), res
);
17195 static Long
dis_PMOVxXWD_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17196 Long delta
, Bool xIsZ
)
17198 IRTemp addr
= IRTemp_INVALID
;
17201 IRTemp srcVec
= newTemp(Ity_V128
);
17202 UChar modrm
= getUChar(delta
);
17203 UChar how
= xIsZ
? 'z' : 's';
17204 UInt rG
= gregOfRexRM(pfx
, modrm
);
17206 if ( epartIsReg(modrm
) ) {
17207 UInt rE
= eregOfRexRM(pfx
, modrm
);
17208 assign( srcVec
, getXMMReg(rE
) );
17210 DIP( "vpmov%cxwd %s,%s\n", how
, nameXMMReg(rE
), nameYMMReg(rG
) );
17212 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17213 assign( srcVec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
17215 DIP( "vpmov%cxwd %s,%s\n", how
, dis_buf
, nameYMMReg(rG
) );
17219 = binop( Iop_V128HLtoV256
,
17220 binop( Iop_InterleaveHI16x8
,
17221 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec
) ),
17222 binop( Iop_InterleaveLO16x8
,
17223 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec
) ) );
17225 res
= binop(Iop_SarN32x8
,
17226 binop(Iop_ShlN32x8
, res
, mkU8(16)), mkU8(16));
17228 putYMMReg ( rG
, res
);
17234 static Long
dis_PMOVSXWQ_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17235 Long delta
, Bool isAvx
)
17237 IRTemp addr
= IRTemp_INVALID
;
17240 IRTemp srcBytes
= newTemp(Ity_I32
);
17241 UChar modrm
= getUChar(delta
);
17242 const HChar
* mbV
= isAvx
? "v" : "";
17243 UInt rG
= gregOfRexRM(pfx
, modrm
);
17245 if ( epartIsReg( modrm
) ) {
17246 UInt rE
= eregOfRexRM(pfx
, modrm
);
17247 assign( srcBytes
, getXMMRegLane32( rE
, 0 ) );
17249 DIP( "%spmovsxwq %s,%s\n", mbV
, nameXMMReg(rE
), nameXMMReg(rG
) );
17251 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17252 assign( srcBytes
, loadLE( Ity_I32
, mkexpr(addr
) ) );
17254 DIP( "%spmovsxwq %s,%s\n", mbV
, dis_buf
, nameXMMReg(rG
) );
17257 (isAvx
? putYMMRegLoAndZU
: putXMMReg
)
17258 ( rG
, binop( Iop_64HLtoV128
,
17260 unop( Iop_32HIto16
, mkexpr(srcBytes
) ) ),
17262 unop( Iop_32to16
, mkexpr(srcBytes
) ) ) ) );
17267 static Long
dis_PMOVSXWQ_256 ( const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
)
17269 IRTemp addr
= IRTemp_INVALID
;
17272 IRTemp srcBytes
= newTemp(Ity_I64
);
17273 UChar modrm
= getUChar(delta
);
17274 UInt rG
= gregOfRexRM(pfx
, modrm
);
17275 IRTemp s3
, s2
, s1
, s0
;
17276 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
17278 if ( epartIsReg( modrm
) ) {
17279 UInt rE
= eregOfRexRM(pfx
, modrm
);
17280 assign( srcBytes
, getXMMRegLane64( rE
, 0 ) );
17282 DIP( "vpmovsxwq %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
) );
17284 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17285 assign( srcBytes
, loadLE( Ity_I64
, mkexpr(addr
) ) );
17287 DIP( "vpmovsxwq %s,%s\n", dis_buf
, nameYMMReg(rG
) );
17290 breakup64to16s( srcBytes
, &s3
, &s2
, &s1
, &s0
);
17291 putYMMReg( rG
, binop( Iop_V128HLtoV256
,
17292 binop( Iop_64HLtoV128
,
17293 unop( Iop_16Sto64
, mkexpr(s3
) ),
17294 unop( Iop_16Sto64
, mkexpr(s2
) ) ),
17295 binop( Iop_64HLtoV128
,
17296 unop( Iop_16Sto64
, mkexpr(s1
) ),
17297 unop( Iop_16Sto64
, mkexpr(s0
) ) ) ) );
17302 static Long
dis_PMOVZXWQ_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17303 Long delta
, Bool isAvx
)
17305 IRTemp addr
= IRTemp_INVALID
;
17308 IRTemp srcVec
= newTemp(Ity_V128
);
17309 UChar modrm
= getUChar(delta
);
17310 const HChar
* mbV
= isAvx
? "v" : "";
17311 UInt rG
= gregOfRexRM(pfx
, modrm
);
17313 if ( epartIsReg( modrm
) ) {
17314 UInt rE
= eregOfRexRM(pfx
, modrm
);
17315 assign( srcVec
, getXMMReg(rE
) );
17317 DIP( "%spmovzxwq %s,%s\n", mbV
, nameXMMReg(rE
), nameXMMReg(rG
) );
17319 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17321 unop( Iop_32UtoV128
, loadLE( Ity_I32
, mkexpr(addr
) ) ) );
17323 DIP( "%spmovzxwq %s,%s\n", mbV
, dis_buf
, nameXMMReg(rG
) );
17326 IRTemp zeroVec
= newTemp( Ity_V128
);
17327 assign( zeroVec
, IRExpr_Const( IRConst_V128(0) ) );
17329 (isAvx
? putYMMRegLoAndZU
: putXMMReg
)
17330 ( rG
, binop( Iop_InterleaveLO16x8
,
17332 binop( Iop_InterleaveLO16x8
,
17333 mkexpr(zeroVec
), mkexpr(srcVec
) ) ) );
17338 static Long
dis_PMOVZXWQ_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17341 IRTemp addr
= IRTemp_INVALID
;
17344 IRTemp srcVec
= newTemp(Ity_V128
);
17345 UChar modrm
= getUChar(delta
);
17346 UInt rG
= gregOfRexRM(pfx
, modrm
);
17348 if ( epartIsReg( modrm
) ) {
17349 UInt rE
= eregOfRexRM(pfx
, modrm
);
17350 assign( srcVec
, getXMMReg(rE
) );
17352 DIP( "vpmovzxwq %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
) );
17354 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17356 unop( Iop_64UtoV128
, loadLE( Ity_I64
, mkexpr(addr
) ) ) );
17358 DIP( "vpmovzxwq %s,%s\n", dis_buf
, nameYMMReg(rG
) );
17361 IRTemp zeroVec
= newTemp( Ity_V128
);
17362 assign( zeroVec
, IRExpr_Const( IRConst_V128(0) ) );
17364 putYMMReg( rG
, binop( Iop_V128HLtoV256
,
17365 binop( Iop_InterleaveHI16x8
,
17367 binop( Iop_InterleaveLO16x8
,
17368 mkexpr(zeroVec
), mkexpr(srcVec
) ) ),
17369 binop( Iop_InterleaveLO16x8
,
17371 binop( Iop_InterleaveLO16x8
,
17372 mkexpr(zeroVec
), mkexpr(srcVec
) ) ) ) );
17377 /* Handles 128 bit versions of PMOVZXDQ and PMOVSXDQ. */
17378 static Long
dis_PMOVxXDQ_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17379 Long delta
, Bool isAvx
, Bool xIsZ
)
17381 IRTemp addr
= IRTemp_INVALID
;
17384 IRTemp srcI64
= newTemp(Ity_I64
);
17385 IRTemp srcVec
= newTemp(Ity_V128
);
17386 UChar modrm
= getUChar(delta
);
17387 const HChar
* mbV
= isAvx
? "v" : "";
17388 const HChar how
= xIsZ
? 'z' : 's';
17389 UInt rG
= gregOfRexRM(pfx
, modrm
);
17390 /* Compute both srcI64 -- the value to expand -- and srcVec -- same
17391 thing in a V128, with arbitrary junk in the top 64 bits. Use
17392 one or both of them and let iropt clean up afterwards (as
17394 if ( epartIsReg(modrm
) ) {
17395 UInt rE
= eregOfRexRM(pfx
, modrm
);
17396 assign( srcVec
, getXMMReg(rE
) );
17397 assign( srcI64
, unop(Iop_V128to64
, mkexpr(srcVec
)) );
17399 DIP( "%spmov%cxdq %s,%s\n", mbV
, how
, nameXMMReg(rE
), nameXMMReg(rG
) );
17401 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17402 assign( srcI64
, loadLE(Ity_I64
, mkexpr(addr
)) );
17403 assign( srcVec
, unop( Iop_64UtoV128
, mkexpr(srcI64
)) );
17405 DIP( "%spmov%cxdq %s,%s\n", mbV
, how
, dis_buf
, nameXMMReg(rG
) );
17409 = xIsZ
/* do math for either zero or sign extend */
17410 ? binop( Iop_InterleaveLO32x4
,
17411 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec
) )
17412 : binop( Iop_64HLtoV128
,
17414 unop( Iop_64HIto32
, mkexpr(srcI64
) ) ),
17416 unop( Iop_64to32
, mkexpr(srcI64
) ) ) );
17418 (isAvx
? putYMMRegLoAndZU
: putXMMReg
) ( rG
, res
);
17424 /* Handles 256 bit versions of PMOVZXDQ and PMOVSXDQ. */
17425 static Long
dis_PMOVxXDQ_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17426 Long delta
, Bool xIsZ
)
17428 IRTemp addr
= IRTemp_INVALID
;
17431 IRTemp srcVec
= newTemp(Ity_V128
);
17432 UChar modrm
= getUChar(delta
);
17433 UChar how
= xIsZ
? 'z' : 's';
17434 UInt rG
= gregOfRexRM(pfx
, modrm
);
17435 /* Compute both srcI64 -- the value to expand -- and srcVec -- same
17436 thing in a V128, with arbitrary junk in the top 64 bits. Use
17437 one or both of them and let iropt clean up afterwards (as
17439 if ( epartIsReg(modrm
) ) {
17440 UInt rE
= eregOfRexRM(pfx
, modrm
);
17441 assign( srcVec
, getXMMReg(rE
) );
17443 DIP( "vpmov%cxdq %s,%s\n", how
, nameXMMReg(rE
), nameYMMReg(rG
) );
17445 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17446 assign( srcVec
, loadLE(Ity_V128
, mkexpr(addr
)) );
17448 DIP( "vpmov%cxdq %s,%s\n", how
, dis_buf
, nameYMMReg(rG
) );
17453 res
= binop( Iop_V128HLtoV256
,
17454 binop( Iop_InterleaveHI32x4
,
17455 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec
) ),
17456 binop( Iop_InterleaveLO32x4
,
17457 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec
) ) );
17459 IRTemp s3
, s2
, s1
, s0
;
17460 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
17461 breakupV128to32s( srcVec
, &s3
, &s2
, &s1
, &s0
);
17462 res
= binop( Iop_V128HLtoV256
,
17463 binop( Iop_64HLtoV128
,
17464 unop( Iop_32Sto64
, mkexpr(s3
) ),
17465 unop( Iop_32Sto64
, mkexpr(s2
) ) ),
17466 binop( Iop_64HLtoV128
,
17467 unop( Iop_32Sto64
, mkexpr(s1
) ),
17468 unop( Iop_32Sto64
, mkexpr(s0
) ) ) );
17471 putYMMReg ( rG
, res
);
17477 /* Handles 128 bit versions of PMOVZXBD and PMOVSXBD. */
17478 static Long
dis_PMOVxXBD_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17479 Long delta
, Bool isAvx
, Bool xIsZ
)
17481 IRTemp addr
= IRTemp_INVALID
;
17484 IRTemp srcVec
= newTemp(Ity_V128
);
17485 UChar modrm
= getUChar(delta
);
17486 const HChar
* mbV
= isAvx
? "v" : "";
17487 const HChar how
= xIsZ
? 'z' : 's';
17488 UInt rG
= gregOfRexRM(pfx
, modrm
);
17489 if ( epartIsReg(modrm
) ) {
17490 UInt rE
= eregOfRexRM(pfx
, modrm
);
17491 assign( srcVec
, getXMMReg(rE
) );
17493 DIP( "%spmov%cxbd %s,%s\n", mbV
, how
, nameXMMReg(rE
), nameXMMReg(rG
) );
17495 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17497 unop( Iop_32UtoV128
, loadLE( Ity_I32
, mkexpr(addr
) ) ) );
17499 DIP( "%spmov%cxbd %s,%s\n", mbV
, how
, dis_buf
, nameXMMReg(rG
) );
17502 IRTemp zeroVec
= newTemp(Ity_V128
);
17503 assign( zeroVec
, IRExpr_Const( IRConst_V128(0) ) );
17506 = binop(Iop_InterleaveLO8x16
,
17508 binop(Iop_InterleaveLO8x16
,
17509 mkexpr(zeroVec
), mkexpr(srcVec
)));
17511 res
= binop(Iop_SarN32x4
,
17512 binop(Iop_ShlN32x4
, res
, mkU8(24)), mkU8(24));
17514 (isAvx
? putYMMRegLoAndZU
: putXMMReg
) ( rG
, res
);
17520 /* Handles 256 bit versions of PMOVZXBD and PMOVSXBD. */
17521 static Long
dis_PMOVxXBD_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17522 Long delta
, Bool xIsZ
)
17524 IRTemp addr
= IRTemp_INVALID
;
17527 IRTemp srcVec
= newTemp(Ity_V128
);
17528 UChar modrm
= getUChar(delta
);
17529 UChar how
= xIsZ
? 'z' : 's';
17530 UInt rG
= gregOfRexRM(pfx
, modrm
);
17531 if ( epartIsReg(modrm
) ) {
17532 UInt rE
= eregOfRexRM(pfx
, modrm
);
17533 assign( srcVec
, getXMMReg(rE
) );
17535 DIP( "vpmov%cxbd %s,%s\n", how
, nameXMMReg(rE
), nameYMMReg(rG
) );
17537 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17539 unop( Iop_64UtoV128
, loadLE( Ity_I64
, mkexpr(addr
) ) ) );
17541 DIP( "vpmov%cxbd %s,%s\n", how
, dis_buf
, nameYMMReg(rG
) );
17544 IRTemp zeroVec
= newTemp(Ity_V128
);
17545 assign( zeroVec
, IRExpr_Const( IRConst_V128(0) ) );
17548 = binop( Iop_V128HLtoV256
,
17549 binop(Iop_InterleaveHI8x16
,
17551 binop(Iop_InterleaveLO8x16
,
17552 mkexpr(zeroVec
), mkexpr(srcVec
)) ),
17553 binop(Iop_InterleaveLO8x16
,
17555 binop(Iop_InterleaveLO8x16
,
17556 mkexpr(zeroVec
), mkexpr(srcVec
)) ) );
17558 res
= binop(Iop_SarN32x8
,
17559 binop(Iop_ShlN32x8
, res
, mkU8(24)), mkU8(24));
17561 putYMMReg ( rG
, res
);
17567 /* Handles 128 bit versions of PMOVSXBQ. */
17568 static Long
dis_PMOVSXBQ_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17569 Long delta
, Bool isAvx
)
17571 IRTemp addr
= IRTemp_INVALID
;
17574 IRTemp srcBytes
= newTemp(Ity_I16
);
17575 UChar modrm
= getUChar(delta
);
17576 const HChar
* mbV
= isAvx
? "v" : "";
17577 UInt rG
= gregOfRexRM(pfx
, modrm
);
17578 if ( epartIsReg(modrm
) ) {
17579 UInt rE
= eregOfRexRM(pfx
, modrm
);
17580 assign( srcBytes
, getXMMRegLane16( rE
, 0 ) );
17582 DIP( "%spmovsxbq %s,%s\n", mbV
, nameXMMReg(rE
), nameXMMReg(rG
) );
17584 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17585 assign( srcBytes
, loadLE( Ity_I16
, mkexpr(addr
) ) );
17587 DIP( "%spmovsxbq %s,%s\n", mbV
, dis_buf
, nameXMMReg(rG
) );
17590 (isAvx
? putYMMRegLoAndZU
: putXMMReg
)
17591 ( rG
, binop( Iop_64HLtoV128
,
17593 unop( Iop_16HIto8
, mkexpr(srcBytes
) ) ),
17595 unop( Iop_16to8
, mkexpr(srcBytes
) ) ) ) );
17600 /* Handles 256 bit versions of PMOVSXBQ. */
17601 static Long
dis_PMOVSXBQ_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17604 IRTemp addr
= IRTemp_INVALID
;
17607 IRTemp srcBytes
= newTemp(Ity_I32
);
17608 UChar modrm
= getUChar(delta
);
17609 UInt rG
= gregOfRexRM(pfx
, modrm
);
17610 if ( epartIsReg(modrm
) ) {
17611 UInt rE
= eregOfRexRM(pfx
, modrm
);
17612 assign( srcBytes
, getXMMRegLane32( rE
, 0 ) );
17614 DIP( "vpmovsxbq %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
) );
17616 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17617 assign( srcBytes
, loadLE( Ity_I32
, mkexpr(addr
) ) );
17619 DIP( "vpmovsxbq %s,%s\n", dis_buf
, nameYMMReg(rG
) );
17623 ( rG
, binop( Iop_V128HLtoV256
,
17624 binop( Iop_64HLtoV128
,
17627 unop( Iop_32HIto16
,
17628 mkexpr(srcBytes
) ) ) ),
17631 unop( Iop_32HIto16
,
17632 mkexpr(srcBytes
) ) ) ) ),
17633 binop( Iop_64HLtoV128
,
17637 mkexpr(srcBytes
) ) ) ),
17641 mkexpr(srcBytes
) ) ) ) ) ) );
17646 /* Handles 128 bit versions of PMOVZXBQ. */
17647 static Long
dis_PMOVZXBQ_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17648 Long delta
, Bool isAvx
)
17650 IRTemp addr
= IRTemp_INVALID
;
17653 IRTemp srcVec
= newTemp(Ity_V128
);
17654 UChar modrm
= getUChar(delta
);
17655 const HChar
* mbV
= isAvx
? "v" : "";
17656 UInt rG
= gregOfRexRM(pfx
, modrm
);
17657 if ( epartIsReg(modrm
) ) {
17658 UInt rE
= eregOfRexRM(pfx
, modrm
);
17659 assign( srcVec
, getXMMReg(rE
) );
17661 DIP( "%spmovzxbq %s,%s\n", mbV
, nameXMMReg(rE
), nameXMMReg(rG
) );
17663 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17665 unop( Iop_32UtoV128
,
17666 unop( Iop_16Uto32
, loadLE( Ity_I16
, mkexpr(addr
) ))));
17668 DIP( "%spmovzxbq %s,%s\n", mbV
, dis_buf
, nameXMMReg(rG
) );
17671 IRTemp zeroVec
= newTemp(Ity_V128
);
17672 assign( zeroVec
, IRExpr_Const( IRConst_V128(0) ) );
17674 (isAvx
? putYMMRegLoAndZU
: putXMMReg
)
17675 ( rG
, binop( Iop_InterleaveLO8x16
,
17677 binop( Iop_InterleaveLO8x16
,
17679 binop( Iop_InterleaveLO8x16
,
17680 mkexpr(zeroVec
), mkexpr(srcVec
) ) ) ) );
17685 /* Handles 256 bit versions of PMOVZXBQ. */
17686 static Long
dis_PMOVZXBQ_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17689 IRTemp addr
= IRTemp_INVALID
;
17692 IRTemp srcVec
= newTemp(Ity_V128
);
17693 UChar modrm
= getUChar(delta
);
17694 UInt rG
= gregOfRexRM(pfx
, modrm
);
17695 if ( epartIsReg(modrm
) ) {
17696 UInt rE
= eregOfRexRM(pfx
, modrm
);
17697 assign( srcVec
, getXMMReg(rE
) );
17699 DIP( "vpmovzxbq %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
) );
17701 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17703 unop( Iop_32UtoV128
, loadLE( Ity_I32
, mkexpr(addr
) )));
17705 DIP( "vpmovzxbq %s,%s\n", dis_buf
, nameYMMReg(rG
) );
17708 IRTemp zeroVec
= newTemp(Ity_V128
);
17709 assign( zeroVec
, IRExpr_Const( IRConst_V128(0) ) );
17712 ( rG
, binop( Iop_V128HLtoV256
,
17713 binop( Iop_InterleaveHI8x16
,
17715 binop( Iop_InterleaveLO8x16
,
17717 binop( Iop_InterleaveLO8x16
,
17718 mkexpr(zeroVec
), mkexpr(srcVec
) ) ) ),
17719 binop( Iop_InterleaveLO8x16
,
17721 binop( Iop_InterleaveLO8x16
,
17723 binop( Iop_InterleaveLO8x16
,
17724 mkexpr(zeroVec
), mkexpr(srcVec
) ) ) )
17730 static Long
dis_PHMINPOSUW_128 ( const VexAbiInfo
* vbi
, Prefix pfx
,
17731 Long delta
, Bool isAvx
)
17733 IRTemp addr
= IRTemp_INVALID
;
17736 UChar modrm
= getUChar(delta
);
17737 const HChar
* mbV
= isAvx
? "v" : "";
17738 IRTemp sV
= newTemp(Ity_V128
);
17739 IRTemp sHi
= newTemp(Ity_I64
);
17740 IRTemp sLo
= newTemp(Ity_I64
);
17741 IRTemp dLo
= newTemp(Ity_I64
);
17742 UInt rG
= gregOfRexRM(pfx
,modrm
);
17743 if (epartIsReg(modrm
)) {
17744 UInt rE
= eregOfRexRM(pfx
,modrm
);
17745 assign( sV
, getXMMReg(rE
) );
17747 DIP("%sphminposuw %s,%s\n", mbV
, nameXMMReg(rE
), nameXMMReg(rG
));
17749 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17751 gen_SIGNAL_if_not_16_aligned(vbi
, addr
);
17752 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
17754 DIP("%sphminposuw %s,%s\n", mbV
, dis_buf
, nameXMMReg(rG
));
17756 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(sV
)) );
17757 assign( sLo
, unop(Iop_V128to64
, mkexpr(sV
)) );
17758 assign( dLo
, mkIRExprCCall(
17759 Ity_I64
, 0/*regparms*/,
17760 "amd64g_calculate_sse_phminposuw",
17761 &amd64g_calculate_sse_phminposuw
,
17762 mkIRExprVec_2( mkexpr(sLo
), mkexpr(sHi
) )
17764 (isAvx
? putYMMRegLoAndZU
: putXMMReg
)
17765 (rG
, unop(Iop_64UtoV128
, mkexpr(dLo
)));
17770 static Long
dis_AESx ( const VexAbiInfo
* vbi
, Prefix pfx
,
17771 Long delta
, Bool isAvx
, UChar opc
)
17773 IRTemp addr
= IRTemp_INVALID
;
17776 UChar modrm
= getUChar(delta
);
17777 UInt rG
= gregOfRexRM(pfx
, modrm
);
17779 UInt regNoR
= (isAvx
&& opc
!= 0xDB) ? getVexNvvvv(pfx
) : rG
;
17781 /* This is a nasty kludge. We need to pass 2 x V128 to the
17782 helper. Since we can't do that, use a dirty
17783 helper to compute the results directly from the XMM regs in
17784 the guest state. That means for the memory case, we need to
17785 move the left operand into a pseudo-register (XMM16, let's
17787 if (epartIsReg(modrm
)) {
17788 regNoL
= eregOfRexRM(pfx
, modrm
);
17791 regNoL
= 16; /* use XMM16 as an intermediary */
17792 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17793 /* alignment check needed ???? */
17794 stmt( IRStmt_Put( OFFB_YMM16
, loadLE(Ity_V128
, mkexpr(addr
)) ));
17798 void* fn
= &amd64g_dirtyhelper_AES
;
17799 const HChar
* nm
= "amd64g_dirtyhelper_AES";
17801 /* Round up the arguments. Note that this is a kludge -- the
17802 use of mkU64 rather than mkIRExpr_HWord implies the
17803 assumption that the host's word size is 64-bit. */
17804 UInt gstOffD
= ymmGuestRegOffset(rG
);
17805 UInt gstOffL
= regNoL
== 16 ? OFFB_YMM16
: ymmGuestRegOffset(regNoL
);
17806 UInt gstOffR
= ymmGuestRegOffset(regNoR
);
17807 IRExpr
* opc4
= mkU64(opc
);
17808 IRExpr
* gstOffDe
= mkU64(gstOffD
);
17809 IRExpr
* gstOffLe
= mkU64(gstOffL
);
17810 IRExpr
* gstOffRe
= mkU64(gstOffR
);
17812 = mkIRExprVec_5( IRExpr_GSPTR(), opc4
, gstOffDe
, gstOffLe
, gstOffRe
);
17814 IRDirty
* d
= unsafeIRDirty_0_N( 0/*regparms*/, nm
, fn
, args
);
17815 /* It's not really a dirty call, but we can't use the clean helper
17816 mechanism here for the very lame reason that we can't pass 2 x
17817 V128s by value to a helper. Hence this roundabout scheme. */
17819 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
17820 /* AES{ENC,ENCLAST,DEC,DECLAST} read both registers, and writes
17821 the second for !isAvx or the third for isAvx.
17822 AESIMC (0xDB) reads the first register, and writes the second. */
17823 d
->fxState
[0].fx
= Ifx_Read
;
17824 d
->fxState
[0].offset
= gstOffL
;
17825 d
->fxState
[0].size
= sizeof(U128
);
17826 d
->fxState
[1].offset
= gstOffR
;
17827 d
->fxState
[1].size
= sizeof(U128
);
17829 d
->fxState
[1].fx
= Ifx_Write
;
17830 else if (!isAvx
|| rG
== regNoR
)
17831 d
->fxState
[1].fx
= Ifx_Modify
;
17833 d
->fxState
[1].fx
= Ifx_Read
;
17835 d
->fxState
[2].fx
= Ifx_Write
;
17836 d
->fxState
[2].offset
= gstOffD
;
17837 d
->fxState
[2].size
= sizeof(U128
);
17840 stmt( IRStmt_Dirty(d
) );
17842 const HChar
* opsuf
;
17844 case 0xDC: opsuf
= "enc"; break;
17845 case 0XDD: opsuf
= "enclast"; break;
17846 case 0xDE: opsuf
= "dec"; break;
17847 case 0xDF: opsuf
= "declast"; break;
17848 case 0xDB: opsuf
= "imc"; break;
17849 default: vassert(0);
17851 DIP("%saes%s %s,%s%s%s\n", isAvx
? "v" : "", opsuf
,
17852 (regNoL
== 16 ? dis_buf
: nameXMMReg(regNoL
)),
17853 nameXMMReg(regNoR
),
17854 (isAvx
&& opc
!= 0xDB) ? "," : "",
17855 (isAvx
&& opc
!= 0xDB) ? nameXMMReg(rG
) : "");
17858 putYMMRegLane128( rG
, 1, mkV128(0) );
17862 static Long
dis_AESKEYGENASSIST ( const VexAbiInfo
* vbi
, Prefix pfx
,
17863 Long delta
, Bool isAvx
)
17865 IRTemp addr
= IRTemp_INVALID
;
17868 UChar modrm
= getUChar(delta
);
17870 UInt regNoR
= gregOfRexRM(pfx
, modrm
);
17873 /* This is a nasty kludge. See AESENC et al. instructions. */
17874 modrm
= getUChar(delta
);
17875 if (epartIsReg(modrm
)) {
17876 regNoL
= eregOfRexRM(pfx
, modrm
);
17877 imm
= getUChar(delta
+1);
17880 regNoL
= 16; /* use XMM16 as an intermediary */
17881 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
17882 /* alignment check ???? . */
17883 stmt( IRStmt_Put( OFFB_YMM16
, loadLE(Ity_V128
, mkexpr(addr
)) ));
17884 imm
= getUChar(delta
+alen
);
17888 /* Who ya gonna call? Presumably not Ghostbusters. */
17889 void* fn
= &amd64g_dirtyhelper_AESKEYGENASSIST
;
17890 const HChar
* nm
= "amd64g_dirtyhelper_AESKEYGENASSIST";
17892 /* Round up the arguments. Note that this is a kludge -- the
17893 use of mkU64 rather than mkIRExpr_HWord implies the
17894 assumption that the host's word size is 64-bit. */
17895 UInt gstOffL
= regNoL
== 16 ? OFFB_YMM16
: ymmGuestRegOffset(regNoL
);
17896 UInt gstOffR
= ymmGuestRegOffset(regNoR
);
17898 IRExpr
* imme
= mkU64(imm
& 0xFF);
17899 IRExpr
* gstOffLe
= mkU64(gstOffL
);
17900 IRExpr
* gstOffRe
= mkU64(gstOffR
);
17902 = mkIRExprVec_4( IRExpr_GSPTR(), imme
, gstOffLe
, gstOffRe
);
17904 IRDirty
* d
= unsafeIRDirty_0_N( 0/*regparms*/, nm
, fn
, args
);
17905 /* It's not really a dirty call, but we can't use the clean helper
17906 mechanism here for the very lame reason that we can't pass 2 x
17907 V128s by value to a helper. Hence this roundabout scheme. */
17909 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
17910 d
->fxState
[0].fx
= Ifx_Read
;
17911 d
->fxState
[0].offset
= gstOffL
;
17912 d
->fxState
[0].size
= sizeof(U128
);
17913 d
->fxState
[1].fx
= Ifx_Write
;
17914 d
->fxState
[1].offset
= gstOffR
;
17915 d
->fxState
[1].size
= sizeof(U128
);
17916 stmt( IRStmt_Dirty(d
) );
17918 DIP("%saeskeygenassist $%x,%s,%s\n", isAvx
? "v" : "", (UInt
)imm
,
17919 (regNoL
== 16 ? dis_buf
: nameXMMReg(regNoL
)),
17920 nameXMMReg(regNoR
));
17922 putYMMRegLane128( regNoR
, 1, mkV128(0) );
17927 __attribute__((noinline
))
17929 Long
dis_ESC_0F38__SSE4 ( Bool
* decode_OK
,
17930 const VexAbiInfo
* vbi
,
17931 Prefix pfx
, Int sz
, Long deltaIN
)
17933 IRTemp addr
= IRTemp_INVALID
;
17938 *decode_OK
= False
;
17940 Long delta
= deltaIN
;
17941 UChar opc
= getUChar(delta
);
17948 /* 66 0F 38 10 /r = PBLENDVB xmm1, xmm2/m128 (byte gran)
17949 66 0F 38 14 /r = BLENDVPS xmm1, xmm2/m128 (float gran)
17950 66 0F 38 15 /r = BLENDVPD xmm1, xmm2/m128 (double gran)
17951 Blend at various granularities, with XMM0 (implicit operand)
17952 providing the controlling mask.
17954 if (have66noF2noF3(pfx
) && sz
== 2) {
17955 modrm
= getUChar(delta
);
17957 const HChar
* nm
= NULL
;
17959 IROp opSAR
= Iop_INVALID
;
17962 nm
= "pblendvb"; gran
= 1; opSAR
= Iop_SarN8x16
;
17965 nm
= "blendvps"; gran
= 4; opSAR
= Iop_SarN32x4
;
17968 nm
= "blendvpd"; gran
= 8; opSAR
= Iop_SarN64x2
;
17973 IRTemp vecE
= newTemp(Ity_V128
);
17974 IRTemp vecG
= newTemp(Ity_V128
);
17975 IRTemp vec0
= newTemp(Ity_V128
);
17977 if ( epartIsReg(modrm
) ) {
17978 assign(vecE
, getXMMReg(eregOfRexRM(pfx
, modrm
)));
17980 DIP( "%s %s,%s\n", nm
,
17981 nameXMMReg( eregOfRexRM(pfx
, modrm
) ),
17982 nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
17984 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
17985 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
17986 assign(vecE
, loadLE( Ity_V128
, mkexpr(addr
) ));
17988 DIP( "%s %s,%s\n", nm
,
17989 dis_buf
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
17992 assign(vecG
, getXMMReg(gregOfRexRM(pfx
, modrm
)));
17993 assign(vec0
, getXMMReg(0));
17995 IRTemp res
= math_PBLENDVB_128( vecE
, vecG
, vec0
, gran
, opSAR
);
17996 putXMMReg(gregOfRexRM(pfx
, modrm
), mkexpr(res
));
17998 goto decode_success
;
18003 /* 66 0F 38 17 /r = PTEST xmm1, xmm2/m128
18004 Logical compare (set ZF and CF from AND/ANDN of the operands) */
18005 if (have66noF2noF3(pfx
)
18006 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
18007 delta
= dis_xTESTy_128( vbi
, pfx
, delta
, False
/*!isAvx*/, 0 );
18008 goto decode_success
;
18013 /* 66 0F 38 20 /r = PMOVSXBW xmm1, xmm2/m64
18014 Packed Move with Sign Extend from Byte to Word (XMM) */
18015 if (have66noF2noF3(pfx
) && sz
== 2) {
18016 delta
= dis_PMOVxXBW_128( vbi
, pfx
, delta
,
18017 False
/*!isAvx*/, False
/*!xIsZ*/ );
18018 goto decode_success
;
18023 /* 66 0F 38 21 /r = PMOVSXBD xmm1, xmm2/m32
18024 Packed Move with Sign Extend from Byte to DWord (XMM) */
18025 if (have66noF2noF3(pfx
) && sz
== 2) {
18026 delta
= dis_PMOVxXBD_128( vbi
, pfx
, delta
,
18027 False
/*!isAvx*/, False
/*!xIsZ*/ );
18028 goto decode_success
;
18033 /* 66 0F 38 22 /r = PMOVSXBQ xmm1, xmm2/m16
18034 Packed Move with Sign Extend from Byte to QWord (XMM) */
18035 if (have66noF2noF3(pfx
) && sz
== 2) {
18036 delta
= dis_PMOVSXBQ_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
18037 goto decode_success
;
18042 /* 66 0F 38 23 /r = PMOVSXWD xmm1, xmm2/m64
18043 Packed Move with Sign Extend from Word to DWord (XMM) */
18044 if (have66noF2noF3(pfx
) && sz
== 2) {
18045 delta
= dis_PMOVxXWD_128(vbi
, pfx
, delta
,
18046 False
/*!isAvx*/, False
/*!xIsZ*/);
18047 goto decode_success
;
18052 /* 66 0F 38 24 /r = PMOVSXWQ xmm1, xmm2/m32
18053 Packed Move with Sign Extend from Word to QWord (XMM) */
18054 if (have66noF2noF3(pfx
) && sz
== 2) {
18055 delta
= dis_PMOVSXWQ_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
18056 goto decode_success
;
18061 /* 66 0F 38 25 /r = PMOVSXDQ xmm1, xmm2/m64
18062 Packed Move with Sign Extend from Double Word to Quad Word (XMM) */
18063 if (have66noF2noF3(pfx
) && sz
== 2) {
18064 delta
= dis_PMOVxXDQ_128( vbi
, pfx
, delta
,
18065 False
/*!isAvx*/, False
/*!xIsZ*/ );
18066 goto decode_success
;
18071 /* 66 0F 38 28 = PMULDQ -- signed widening multiply of 32-lanes
18072 0 x 0 to form lower 64-bit half and lanes 2 x 2 to form upper
18074 /* This is a really poor translation -- could be improved if
18075 performance critical. It's a copy-paste of PMULUDQ, too. */
18076 if (have66noF2noF3(pfx
) && sz
== 2) {
18077 IRTemp sV
= newTemp(Ity_V128
);
18078 IRTemp dV
= newTemp(Ity_V128
);
18079 modrm
= getUChar(delta
);
18080 UInt rG
= gregOfRexRM(pfx
,modrm
);
18081 assign( dV
, getXMMReg(rG
) );
18082 if (epartIsReg(modrm
)) {
18083 UInt rE
= eregOfRexRM(pfx
,modrm
);
18084 assign( sV
, getXMMReg(rE
) );
18086 DIP("pmuldq %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
18088 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
18089 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
18091 DIP("pmuldq %s,%s\n", dis_buf
, nameXMMReg(rG
));
18094 putXMMReg( rG
, mkexpr(math_PMULDQ_128( dV
, sV
)) );
18095 goto decode_success
;
18100 /* 66 0F 38 29 = PCMPEQQ
18101 64x2 equality comparison */
18102 if (have66noF2noF3(pfx
) && sz
== 2) {
18103 /* FIXME: this needs an alignment check */
18104 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
18105 "pcmpeqq", Iop_CmpEQ64x2
, False
);
18106 goto decode_success
;
18111 /* 66 0F 38 2A = MOVNTDQA
18112 "non-temporal" "streaming" load
18113 Handle like MOVDQA but only memory operand is allowed */
18114 if (have66noF2noF3(pfx
) && sz
== 2) {
18115 modrm
= getUChar(delta
);
18116 if (!epartIsReg(modrm
)) {
18117 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
18118 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
18119 putXMMReg( gregOfRexRM(pfx
,modrm
),
18120 loadLE(Ity_V128
, mkexpr(addr
)) );
18121 DIP("movntdqa %s,%s\n", dis_buf
,
18122 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
18124 goto decode_success
;
18130 /* 66 0f 38 2B /r = PACKUSDW xmm1, xmm2/m128
18131 2x 32x4 S->U saturating narrow from xmm2/m128 to xmm1 */
18132 if (have66noF2noF3(pfx
) && sz
== 2) {
18134 modrm
= getUChar(delta
);
18136 IRTemp argL
= newTemp(Ity_V128
);
18137 IRTemp argR
= newTemp(Ity_V128
);
18139 if ( epartIsReg(modrm
) ) {
18140 assign( argL
, getXMMReg( eregOfRexRM(pfx
, modrm
) ) );
18142 DIP( "packusdw %s,%s\n",
18143 nameXMMReg( eregOfRexRM(pfx
, modrm
) ),
18144 nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
18146 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
18147 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
18148 assign( argL
, loadLE( Ity_V128
, mkexpr(addr
) ));
18150 DIP( "packusdw %s,%s\n",
18151 dis_buf
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
18154 assign(argR
, getXMMReg( gregOfRexRM(pfx
, modrm
) ));
18156 putXMMReg( gregOfRexRM(pfx
, modrm
),
18157 binop( Iop_QNarrowBin32Sto16Ux8
,
18158 mkexpr(argL
), mkexpr(argR
)) );
18160 goto decode_success
;
18165 /* 66 0F 38 30 /r = PMOVZXBW xmm1, xmm2/m64
18166 Packed Move with Zero Extend from Byte to Word (XMM) */
18167 if (have66noF2noF3(pfx
) && sz
== 2) {
18168 delta
= dis_PMOVxXBW_128( vbi
, pfx
, delta
,
18169 False
/*!isAvx*/, True
/*xIsZ*/ );
18170 goto decode_success
;
18175 /* 66 0F 38 31 /r = PMOVZXBD xmm1, xmm2/m32
18176 Packed Move with Zero Extend from Byte to DWord (XMM) */
18177 if (have66noF2noF3(pfx
) && sz
== 2) {
18178 delta
= dis_PMOVxXBD_128( vbi
, pfx
, delta
,
18179 False
/*!isAvx*/, True
/*xIsZ*/ );
18180 goto decode_success
;
18185 /* 66 0F 38 32 /r = PMOVZXBQ xmm1, xmm2/m16
18186 Packed Move with Zero Extend from Byte to QWord (XMM) */
18187 if (have66noF2noF3(pfx
) && sz
== 2) {
18188 delta
= dis_PMOVZXBQ_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
18189 goto decode_success
;
18194 /* 66 0F 38 33 /r = PMOVZXWD xmm1, xmm2/m64
18195 Packed Move with Zero Extend from Word to DWord (XMM) */
18196 if (have66noF2noF3(pfx
) && sz
== 2) {
18197 delta
= dis_PMOVxXWD_128( vbi
, pfx
, delta
,
18198 False
/*!isAvx*/, True
/*xIsZ*/ );
18199 goto decode_success
;
18204 /* 66 0F 38 34 /r = PMOVZXWQ xmm1, xmm2/m32
18205 Packed Move with Zero Extend from Word to QWord (XMM) */
18206 if (have66noF2noF3(pfx
) && sz
== 2) {
18207 delta
= dis_PMOVZXWQ_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
18208 goto decode_success
;
18213 /* 66 0F 38 35 /r = PMOVZXDQ xmm1, xmm2/m64
18214 Packed Move with Zero Extend from DWord to QWord (XMM) */
18215 if (have66noF2noF3(pfx
) && sz
== 2) {
18216 delta
= dis_PMOVxXDQ_128( vbi
, pfx
, delta
,
18217 False
/*!isAvx*/, True
/*xIsZ*/ );
18218 goto decode_success
;
18223 /* 66 0F 38 37 = PCMPGTQ
18224 64x2 comparison (signed, presumably; the Intel docs don't say :-)
18226 if (have66noF2noF3(pfx
) && sz
== 2) {
18227 /* FIXME: this needs an alignment check */
18228 delta
= dis_SSEint_E_to_G( vbi
, pfx
, delta
,
18229 "pcmpgtq", Iop_CmpGT64Sx2
, False
);
18230 goto decode_success
;
18236 /* 66 0F 38 38 /r = PMINSB xmm1, xmm2/m128 8Sx16 (signed) min
18237 66 0F 38 3C /r = PMAXSB xmm1, xmm2/m128 8Sx16 (signed) max
18239 if (have66noF2noF3(pfx
) && sz
== 2) {
18240 /* FIXME: this needs an alignment check */
18241 Bool isMAX
= opc
== 0x3C;
18242 delta
= dis_SSEint_E_to_G(
18244 isMAX
? "pmaxsb" : "pminsb",
18245 isMAX
? Iop_Max8Sx16
: Iop_Min8Sx16
,
18248 goto decode_success
;
18254 /* 66 0F 38 39 /r = PMINSD xmm1, xmm2/m128
18255 Minimum of Packed Signed Double Word Integers (XMM)
18256 66 0F 38 3D /r = PMAXSD xmm1, xmm2/m128
18257 Maximum of Packed Signed Double Word Integers (XMM)
18259 if (have66noF2noF3(pfx
) && sz
== 2) {
18260 /* FIXME: this needs an alignment check */
18261 Bool isMAX
= opc
== 0x3D;
18262 delta
= dis_SSEint_E_to_G(
18264 isMAX
? "pmaxsd" : "pminsd",
18265 isMAX
? Iop_Max32Sx4
: Iop_Min32Sx4
,
18268 goto decode_success
;
18274 /* 66 0F 38 3A /r = PMINUW xmm1, xmm2/m128
18275 Minimum of Packed Unsigned Word Integers (XMM)
18276 66 0F 38 3E /r = PMAXUW xmm1, xmm2/m128
18277 Maximum of Packed Unsigned Word Integers (XMM)
18279 if (have66noF2noF3(pfx
) && sz
== 2) {
18280 /* FIXME: this needs an alignment check */
18281 Bool isMAX
= opc
== 0x3E;
18282 delta
= dis_SSEint_E_to_G(
18284 isMAX
? "pmaxuw" : "pminuw",
18285 isMAX
? Iop_Max16Ux8
: Iop_Min16Ux8
,
18288 goto decode_success
;
18294 /* 66 0F 38 3B /r = PMINUD xmm1, xmm2/m128
18295 Minimum of Packed Unsigned Doubleword Integers (XMM)
18296 66 0F 38 3F /r = PMAXUD xmm1, xmm2/m128
18297 Maximum of Packed Unsigned Doubleword Integers (XMM)
18299 if (have66noF2noF3(pfx
) && sz
== 2) {
18300 /* FIXME: this needs an alignment check */
18301 Bool isMAX
= opc
== 0x3F;
18302 delta
= dis_SSEint_E_to_G(
18304 isMAX
? "pmaxud" : "pminud",
18305 isMAX
? Iop_Max32Ux4
: Iop_Min32Ux4
,
18308 goto decode_success
;
18313 /* 66 0F 38 40 /r = PMULLD xmm1, xmm2/m128
18314 32x4 integer multiply from xmm2/m128 to xmm1 */
18315 if (have66noF2noF3(pfx
) && sz
== 2) {
18317 modrm
= getUChar(delta
);
18319 IRTemp argL
= newTemp(Ity_V128
);
18320 IRTemp argR
= newTemp(Ity_V128
);
18322 if ( epartIsReg(modrm
) ) {
18323 assign( argL
, getXMMReg( eregOfRexRM(pfx
, modrm
) ) );
18325 DIP( "pmulld %s,%s\n",
18326 nameXMMReg( eregOfRexRM(pfx
, modrm
) ),
18327 nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
18329 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
18330 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
18331 assign( argL
, loadLE( Ity_V128
, mkexpr(addr
) ));
18333 DIP( "pmulld %s,%s\n",
18334 dis_buf
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
18337 assign(argR
, getXMMReg( gregOfRexRM(pfx
, modrm
) ));
18339 putXMMReg( gregOfRexRM(pfx
, modrm
),
18340 binop( Iop_Mul32x4
, mkexpr(argL
), mkexpr(argR
)) );
18342 goto decode_success
;
18347 /* 66 0F 38 41 /r = PHMINPOSUW xmm1, xmm2/m128
18348 Packed Horizontal Word Minimum from xmm2/m128 to xmm1 */
18349 if (have66noF2noF3(pfx
) && sz
== 2) {
18350 delta
= dis_PHMINPOSUW_128( vbi
, pfx
, delta
, False
/*!isAvx*/ );
18351 goto decode_success
;
18360 /* 66 0F 38 DC /r = AESENC xmm1, xmm2/m128
18361 DD /r = AESENCLAST xmm1, xmm2/m128
18362 DE /r = AESDEC xmm1, xmm2/m128
18363 DF /r = AESDECLAST xmm1, xmm2/m128
18365 DB /r = AESIMC xmm1, xmm2/m128 */
18366 if (have66noF2noF3(pfx
) && sz
== 2) {
18367 delta
= dis_AESx( vbi
, pfx
, delta
, False
/*!isAvx*/, opc
);
18368 goto decode_success
;
18374 /* F2 0F 38 F0 /r = CRC32 r/m8, r32 (REX.W ok, 66 not ok)
18375 F2 0F 38 F1 /r = CRC32 r/m{16,32,64}, r32
18376 The decoding on this is a bit unusual.
18378 if (haveF2noF3(pfx
)
18379 && (opc
== 0xF1 || (opc
== 0xF0 && !have66(pfx
)))) {
18380 modrm
= getUChar(delta
);
18385 vassert(sz
== 2 || sz
== 4 || sz
== 8);
18387 IRType tyE
= szToITy(sz
);
18388 IRTemp valE
= newTemp(tyE
);
18390 if (epartIsReg(modrm
)) {
18391 assign(valE
, getIRegE(sz
, pfx
, modrm
));
18393 DIP("crc32b %s,%s\n", nameIRegE(sz
, pfx
, modrm
),
18394 nameIRegG(1==getRexW(pfx
) ? 8 : 4, pfx
, modrm
));
18396 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
18397 assign(valE
, loadLE(tyE
, mkexpr(addr
)));
18399 DIP("crc32b %s,%s\n", dis_buf
,
18400 nameIRegG(1==getRexW(pfx
) ? 8 : 4, pfx
, modrm
));
18403 /* Somewhat funny getting/putting of the crc32 value, in order
18404 to ensure that it turns into 64-bit gets and puts. However,
18405 mask off the upper 32 bits so as to not get memcheck false
18406 +ves around the helper call. */
18407 IRTemp valG0
= newTemp(Ity_I64
);
18408 assign(valG0
, binop(Iop_And64
, getIRegG(8, pfx
, modrm
),
18409 mkU64(0xFFFFFFFF)));
18411 const HChar
* nm
= NULL
;
18414 case 1: nm
= "amd64g_calc_crc32b";
18415 fn
= &amd64g_calc_crc32b
; break;
18416 case 2: nm
= "amd64g_calc_crc32w";
18417 fn
= &amd64g_calc_crc32w
; break;
18418 case 4: nm
= "amd64g_calc_crc32l";
18419 fn
= &amd64g_calc_crc32l
; break;
18420 case 8: nm
= "amd64g_calc_crc32q";
18421 fn
= &amd64g_calc_crc32q
; break;
18424 IRTemp valG1
= newTemp(Ity_I64
);
18426 mkIRExprCCall(Ity_I64
, 0/*regparm*/, nm
, fn
,
18427 mkIRExprVec_2(mkexpr(valG0
),
18428 widenUto64(mkexpr(valE
)))));
18430 putIRegG(4, pfx
, modrm
, unop(Iop_64to32
, mkexpr(valG1
)));
18431 goto decode_success
;
18441 *decode_OK
= False
;
18450 /*------------------------------------------------------------*/
18452 /*--- Top-level SSE4: dis_ESC_0F3A__SSE4 ---*/
18454 /*------------------------------------------------------------*/
18456 static Long
dis_PEXTRW ( const VexAbiInfo
* vbi
, Prefix pfx
,
18457 Long delta
, Bool isAvx
)
18459 IRTemp addr
= IRTemp_INVALID
;
18460 IRTemp t0
= IRTemp_INVALID
;
18461 IRTemp t1
= IRTemp_INVALID
;
18462 IRTemp t2
= IRTemp_INVALID
;
18463 IRTemp t3
= IRTemp_INVALID
;
18464 UChar modrm
= getUChar(delta
);
18467 UInt rG
= gregOfRexRM(pfx
,modrm
);
18469 IRTemp xmm_vec
= newTemp(Ity_V128
);
18470 IRTemp d16
= newTemp(Ity_I16
);
18471 const HChar
* mbV
= isAvx
? "v" : "";
18473 vassert(0==getRexW(pfx
)); /* ensured by caller */
18474 assign( xmm_vec
, getXMMReg(rG
) );
18475 breakupV128to32s( xmm_vec
, &t3
, &t2
, &t1
, &t0
);
18477 if ( epartIsReg( modrm
) ) {
18478 imm8_20
= (Int
)(getUChar(delta
+1) & 7);
18480 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
18481 imm8_20
= (Int
)(getUChar(delta
+alen
) & 7);
18485 case 0: assign(d16
, unop(Iop_32to16
, mkexpr(t0
))); break;
18486 case 1: assign(d16
, unop(Iop_32HIto16
, mkexpr(t0
))); break;
18487 case 2: assign(d16
, unop(Iop_32to16
, mkexpr(t1
))); break;
18488 case 3: assign(d16
, unop(Iop_32HIto16
, mkexpr(t1
))); break;
18489 case 4: assign(d16
, unop(Iop_32to16
, mkexpr(t2
))); break;
18490 case 5: assign(d16
, unop(Iop_32HIto16
, mkexpr(t2
))); break;
18491 case 6: assign(d16
, unop(Iop_32to16
, mkexpr(t3
))); break;
18492 case 7: assign(d16
, unop(Iop_32HIto16
, mkexpr(t3
))); break;
18493 default: vassert(0);
18496 if ( epartIsReg( modrm
) ) {
18497 UInt rE
= eregOfRexRM(pfx
,modrm
);
18498 putIReg32( rE
, unop(Iop_16Uto32
, mkexpr(d16
)) );
18500 DIP( "%spextrw $%d, %s,%s\n", mbV
, imm8_20
,
18501 nameXMMReg( rG
), nameIReg32( rE
) );
18503 storeLE( mkexpr(addr
), mkexpr(d16
) );
18505 DIP( "%spextrw $%d, %s,%s\n", mbV
, imm8_20
, nameXMMReg( rG
), dis_buf
);
18511 static Long
dis_PEXTRD ( const VexAbiInfo
* vbi
, Prefix pfx
,
18512 Long delta
, Bool isAvx
)
18514 IRTemp addr
= IRTemp_INVALID
;
18515 IRTemp t0
= IRTemp_INVALID
;
18516 IRTemp t1
= IRTemp_INVALID
;
18517 IRTemp t2
= IRTemp_INVALID
;
18518 IRTemp t3
= IRTemp_INVALID
;
18524 IRTemp xmm_vec
= newTemp(Ity_V128
);
18525 IRTemp src_dword
= newTemp(Ity_I32
);
18526 const HChar
* mbV
= isAvx
? "v" : "";
18528 vassert(0==getRexW(pfx
)); /* ensured by caller */
18529 modrm
= getUChar(delta
);
18530 assign( xmm_vec
, getXMMReg( gregOfRexRM(pfx
,modrm
) ) );
18531 breakupV128to32s( xmm_vec
, &t3
, &t2
, &t1
, &t0
);
18533 if ( epartIsReg( modrm
) ) {
18534 imm8_10
= (Int
)(getUChar(delta
+1) & 3);
18536 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
18537 imm8_10
= (Int
)(getUChar(delta
+alen
) & 3);
18540 switch ( imm8_10
) {
18541 case 0: assign( src_dword
, mkexpr(t0
) ); break;
18542 case 1: assign( src_dword
, mkexpr(t1
) ); break;
18543 case 2: assign( src_dword
, mkexpr(t2
) ); break;
18544 case 3: assign( src_dword
, mkexpr(t3
) ); break;
18545 default: vassert(0);
18548 if ( epartIsReg( modrm
) ) {
18549 putIReg32( eregOfRexRM(pfx
,modrm
), mkexpr(src_dword
) );
18551 DIP( "%spextrd $%d, %s,%s\n", mbV
, imm8_10
,
18552 nameXMMReg( gregOfRexRM(pfx
, modrm
) ),
18553 nameIReg32( eregOfRexRM(pfx
, modrm
) ) );
18555 storeLE( mkexpr(addr
), mkexpr(src_dword
) );
18557 DIP( "%spextrd $%d, %s,%s\n", mbV
,
18558 imm8_10
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ), dis_buf
);
18564 static Long
dis_PEXTRQ ( const VexAbiInfo
* vbi
, Prefix pfx
,
18565 Long delta
, Bool isAvx
)
18567 IRTemp addr
= IRTemp_INVALID
;
18573 IRTemp xmm_vec
= newTemp(Ity_V128
);
18574 IRTemp src_qword
= newTemp(Ity_I64
);
18575 const HChar
* mbV
= isAvx
? "v" : "";
18577 vassert(1==getRexW(pfx
)); /* ensured by caller */
18578 modrm
= getUChar(delta
);
18579 assign( xmm_vec
, getXMMReg( gregOfRexRM(pfx
,modrm
) ) );
18581 if ( epartIsReg( modrm
) ) {
18582 imm8_0
= (Int
)(getUChar(delta
+1) & 1);
18584 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
18585 imm8_0
= (Int
)(getUChar(delta
+alen
) & 1);
18588 switch ( imm8_0
) {
18589 case 0: assign( src_qword
, unop(Iop_V128to64
, mkexpr(xmm_vec
)) );
18591 case 1: assign( src_qword
, unop(Iop_V128HIto64
, mkexpr(xmm_vec
)) );
18593 default: vassert(0);
18596 if ( epartIsReg( modrm
) ) {
18597 putIReg64( eregOfRexRM(pfx
,modrm
), mkexpr(src_qword
) );
18599 DIP( "%spextrq $%d, %s,%s\n", mbV
, imm8_0
,
18600 nameXMMReg( gregOfRexRM(pfx
, modrm
) ),
18601 nameIReg64( eregOfRexRM(pfx
, modrm
) ) );
18603 storeLE( mkexpr(addr
), mkexpr(src_qword
) );
18605 DIP( "%spextrq $%d, %s,%s\n", mbV
,
18606 imm8_0
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ), dis_buf
);
18611 static IRExpr
* math_CTZ32(IRExpr
*exp
)
18613 /* Iop_Ctz32 isn't implemented by the amd64 back end, so use Iop_Ctz64. */
18614 return unop(Iop_64to32
, unop(Iop_Ctz64
, unop(Iop_32Uto64
, exp
)));
18617 static Long
dis_PCMPISTRI_3A ( UChar modrm
, UInt regNoL
, UInt regNoR
,
18618 Long delta
, UChar opc
, UChar imm
,
18621 /* We only handle PCMPISTRI for now */
18622 vassert((opc
& 0x03) == 0x03);
18623 /* And only an immediate byte of 0x38 or 0x3A */
18624 vassert((imm
& ~0x02) == 0x38);
18626 /* FIXME: Is this correct when RegNoL == 16 ? */
18627 IRTemp argL
= newTemp(Ity_V128
);
18628 assign(argL
, getXMMReg(regNoL
));
18629 IRTemp argR
= newTemp(Ity_V128
);
18630 assign(argR
, getXMMReg(regNoR
));
18632 IRTemp zmaskL
= newTemp(Ity_I32
);
18633 assign(zmaskL
, unop(Iop_16Uto32
,
18634 unop(Iop_GetMSBs8x16
,
18635 binop(Iop_CmpEQ8x16
, mkexpr(argL
), mkV128(0)))));
18636 IRTemp zmaskR
= newTemp(Ity_I32
);
18637 assign(zmaskR
, unop(Iop_16Uto32
,
18638 unop(Iop_GetMSBs8x16
,
18639 binop(Iop_CmpEQ8x16
, mkexpr(argR
), mkV128(0)))));
18641 /* We want validL = ~(zmaskL | -zmaskL)
18643 But this formulation kills memcheck's validity tracking when any
18644 bits above the first "1" are invalid. So reformulate as:
18646 validL = (zmaskL ? (1 << ctz(zmaskL)) : 0) - 1
18649 IRExpr
*ctzL
= unop(Iop_32to8
, math_CTZ32(mkexpr(zmaskL
)));
18651 /* Generate a bool expression which is zero iff the original is
18652 zero. Do this carefully so memcheck can propagate validity bits
18655 IRTemp zmaskL_zero
= newTemp(Ity_I1
);
18656 assign(zmaskL_zero
, binop(Iop_ExpCmpNE32
, mkexpr(zmaskL
), mkU32(0)));
18658 IRTemp validL
= newTemp(Ity_I32
);
18659 assign(validL
, binop(Iop_Sub32
,
18660 IRExpr_ITE(mkexpr(zmaskL_zero
),
18661 binop(Iop_Shl32
, mkU32(1), ctzL
),
18665 /* And similarly for validR. */
18666 IRExpr
*ctzR
= unop(Iop_32to8
, math_CTZ32(mkexpr(zmaskR
)));
18667 IRTemp zmaskR_zero
= newTemp(Ity_I1
);
18668 assign(zmaskR_zero
, binop(Iop_ExpCmpNE32
, mkexpr(zmaskR
), mkU32(0)));
18669 IRTemp validR
= newTemp(Ity_I32
);
18670 assign(validR
, binop(Iop_Sub32
,
18671 IRExpr_ITE(mkexpr(zmaskR_zero
),
18672 binop(Iop_Shl32
, mkU32(1), ctzR
),
18676 /* Do the actual comparison. */
18677 IRExpr
*boolResII
= unop(Iop_16Uto32
,
18678 unop(Iop_GetMSBs8x16
,
18679 binop(Iop_CmpEQ8x16
, mkexpr(argL
),
18682 /* Compute boolresII & validL & validR (i.e., if both valid, use
18683 comparison result) */
18684 IRExpr
*intRes1_a
= binop(Iop_And32
, boolResII
,
18686 mkexpr(validL
), mkexpr(validR
)));
18688 /* Compute ~(validL | validR); i.e., if both invalid, force 1. */
18689 IRExpr
*intRes1_b
= unop(Iop_Not32
, binop(Iop_Or32
,
18690 mkexpr(validL
), mkexpr(validR
)));
18691 /* Otherwise, zero. */
18692 IRExpr
*intRes1
= binop(Iop_And32
, mkU32(0xFFFF),
18693 binop(Iop_Or32
, intRes1_a
, intRes1_b
));
18695 /* The "0x30" in imm=0x3A means "polarity=3" means XOR validL with
18697 IRTemp intRes2
= newTemp(Ity_I32
);
18698 assign(intRes2
, binop(Iop_And32
, mkU32(0xFFFF),
18699 binop(Iop_Xor32
, intRes1
, mkexpr(validL
))));
18701 /* If the 0x40 bit were set in imm=0x3A, we would return the index
18702 of the msb. Since it is clear, we return the index of the
18704 IRExpr
*newECX
= math_CTZ32(binop(Iop_Or32
,
18705 mkexpr(intRes2
), mkU32(0x10000)));
18707 /* And thats our rcx. */
18708 putIReg32(R_RCX
, newECX
);
18710 /* Now for the condition codes... */
18712 /* C == 0 iff intRes2 == 0 */
18713 IRExpr
*c_bit
= IRExpr_ITE( binop(Iop_ExpCmpNE32
, mkexpr(intRes2
),
18715 mkU32(1 << AMD64G_CC_SHIFT_C
),
18717 /* Z == 1 iff any in argL is 0 */
18718 IRExpr
*z_bit
= IRExpr_ITE( mkexpr(zmaskL_zero
),
18719 mkU32(1 << AMD64G_CC_SHIFT_Z
),
18721 /* S == 1 iff any in argR is 0 */
18722 IRExpr
*s_bit
= IRExpr_ITE( mkexpr(zmaskR_zero
),
18723 mkU32(1 << AMD64G_CC_SHIFT_S
),
18725 /* O == IntRes2[0] */
18726 IRExpr
*o_bit
= binop(Iop_Shl32
, binop(Iop_And32
, mkexpr(intRes2
),
18728 mkU8(AMD64G_CC_SHIFT_O
));
18730 /* Put them all together */
18731 IRTemp cc
= newTemp(Ity_I64
);
18732 assign(cc
, widenUto64(binop(Iop_Or32
,
18733 binop(Iop_Or32
, c_bit
, z_bit
),
18734 binop(Iop_Or32
, s_bit
, o_bit
))));
18735 stmt(IRStmt_Put(OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
)));
18736 stmt(IRStmt_Put(OFFB_CC_DEP1
, mkexpr(cc
)));
18737 stmt(IRStmt_Put(OFFB_CC_DEP2
, mkU64(0)));
18738 stmt(IRStmt_Put(OFFB_CC_NDEP
, mkU64(0)));
18743 /* This can fail, in which case it returns the original (unchanged)
18745 static Long
dis_PCMPxSTRx ( const VexAbiInfo
* vbi
, Prefix pfx
,
18746 Long delta
, Bool isAvx
, UChar opc
)
18748 Long delta0
= delta
;
18749 UInt isISTRx
= opc
& 2;
18750 UInt isxSTRM
= (opc
& 1) ^ 1;
18754 IRTemp addr
= IRTemp_INVALID
;
18758 /* This is a nasty kludge. We need to pass 2 x V128 to the helper
18759 (which is clean). Since we can't do that, use a dirty helper to
18760 compute the results directly from the XMM regs in the guest
18761 state. That means for the memory case, we need to move the left
18762 operand into a pseudo-register (XMM16, let's call it). */
18763 UChar modrm
= getUChar(delta
);
18764 if (epartIsReg(modrm
)) {
18765 regNoL
= eregOfRexRM(pfx
, modrm
);
18766 regNoR
= gregOfRexRM(pfx
, modrm
);
18767 imm
= getUChar(delta
+1);
18770 regNoL
= 16; /* use XMM16 as an intermediary */
18771 regNoR
= gregOfRexRM(pfx
, modrm
);
18772 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
18773 /* No alignment check; I guess that makes sense, given that
18774 these insns are for dealing with C style strings. */
18775 stmt( IRStmt_Put( OFFB_YMM16
, loadLE(Ity_V128
, mkexpr(addr
)) ));
18776 imm
= getUChar(delta
+alen
);
18780 /* Print the insn here, since dis_PCMPISTRI_3A doesn't do so
18782 if (regNoL
== 16) {
18783 DIP("%spcmp%cstr%c $%x,%s,%s\n",
18784 isAvx
? "v" : "", isISTRx
? 'i' : 'e', isxSTRM
? 'm' : 'i',
18785 (UInt
)imm
, dis_buf
, nameXMMReg(regNoR
));
18787 DIP("%spcmp%cstr%c $%x,%s,%s\n",
18788 isAvx
? "v" : "", isISTRx
? 'i' : 'e', isxSTRM
? 'm' : 'i',
18789 (UInt
)imm
, nameXMMReg(regNoL
), nameXMMReg(regNoR
));
18792 /* Handle special case(s). */
18793 if (imm
== 0x3A && isISTRx
&& !isxSTRM
) {
18794 return dis_PCMPISTRI_3A ( modrm
, regNoL
, regNoR
, delta
,
18795 opc
, imm
, dis_buf
);
18798 /* Now we know the XMM reg numbers for the operands, and the
18799 immediate byte. Is it one we can actually handle? Throw out any
18800 cases for which the helper function has not been verified. */
18802 case 0x00: case 0x02:
18803 case 0x08: case 0x0A: case 0x0C: case 0x0E:
18804 case 0x10: case 0x12: case 0x14:
18805 case 0x18: case 0x1A:
18806 case 0x30: case 0x34:
18807 case 0x38: case 0x3A:
18808 case 0x40: case 0x42: case 0x44: case 0x46:
18811 case 0x70: case 0x72:
18813 // the 16-bit character versions of the above
18814 case 0x01: case 0x03:
18815 case 0x09: case 0x0B: case 0x0D:
18817 case 0x19: case 0x1B:
18818 case 0x39: case 0x3B:
18819 case 0x41: case 0x45:
18823 return delta0
; /*FAIL*/
18826 /* Who ya gonna call? Presumably not Ghostbusters. */
18827 void* fn
= &amd64g_dirtyhelper_PCMPxSTRx
;
18828 const HChar
* nm
= "amd64g_dirtyhelper_PCMPxSTRx";
18830 /* Round up the arguments. Note that this is a kludge -- the use
18831 of mkU64 rather than mkIRExpr_HWord implies the assumption that
18832 the host's word size is 64-bit. */
18833 UInt gstOffL
= regNoL
== 16 ? OFFB_YMM16
: ymmGuestRegOffset(regNoL
);
18834 UInt gstOffR
= ymmGuestRegOffset(regNoR
);
18836 IRExpr
* opc4_and_imm
= mkU64((opc
<< 8) | (imm
& 0xFF));
18837 IRExpr
* gstOffLe
= mkU64(gstOffL
);
18838 IRExpr
* gstOffRe
= mkU64(gstOffR
);
18839 IRExpr
* edxIN
= isISTRx
? mkU64(0) : getIRegRDX(8);
18840 IRExpr
* eaxIN
= isISTRx
? mkU64(0) : getIRegRAX(8);
18842 = mkIRExprVec_6( IRExpr_GSPTR(),
18843 opc4_and_imm
, gstOffLe
, gstOffRe
, edxIN
, eaxIN
);
18845 IRTemp resT
= newTemp(Ity_I64
);
18846 IRDirty
* d
= unsafeIRDirty_1_N( resT
, 0/*regparms*/, nm
, fn
, args
);
18847 /* It's not really a dirty call, but we can't use the clean helper
18848 mechanism here for the very lame reason that we can't pass 2 x
18849 V128s by value to a helper. Hence this roundabout scheme. */
18851 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
18852 d
->fxState
[0].fx
= Ifx_Read
;
18853 d
->fxState
[0].offset
= gstOffL
;
18854 d
->fxState
[0].size
= sizeof(U128
);
18855 d
->fxState
[1].fx
= Ifx_Read
;
18856 d
->fxState
[1].offset
= gstOffR
;
18857 d
->fxState
[1].size
= sizeof(U128
);
18859 /* Declare that the helper writes XMM0. */
18861 d
->fxState
[2].fx
= Ifx_Write
;
18862 d
->fxState
[2].offset
= ymmGuestRegOffset(0);
18863 d
->fxState
[2].size
= sizeof(U128
);
18866 stmt( IRStmt_Dirty(d
) );
18868 /* Now resT[15:0] holds the new OSZACP values, so the condition
18869 codes must be updated. And for a xSTRI case, resT[31:16] holds
18870 the new ECX value, so stash that too. */
18872 putIReg64(R_RCX
, binop(Iop_And64
,
18873 binop(Iop_Shr64
, mkexpr(resT
), mkU8(16)),
18877 /* Zap the upper half of the dest reg as per AVX conventions. */
18878 if (isxSTRM
&& isAvx
)
18879 putYMMRegLane128(/*YMM*/0, 1, mkV128(0));
18883 binop(Iop_And64
, mkexpr(resT
), mkU64(0xFFFF))
18885 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
18886 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
18887 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
18893 static IRTemp
math_PINSRB_128 ( IRTemp v128
, IRTemp u8
, UInt imm8
)
18895 vassert(imm8
>= 0 && imm8
<= 15);
18897 // Create a V128 value which has the selected byte in the
18898 // specified lane, and zeroes everywhere else.
18899 IRTemp tmp128
= newTemp(Ity_V128
);
18900 IRTemp halfshift
= newTemp(Ity_I64
);
18901 assign(halfshift
, binop(Iop_Shl64
,
18902 unop(Iop_8Uto64
, mkexpr(u8
)),
18903 mkU8(8 * (imm8
& 7))));
18905 assign(tmp128
, binop(Iop_64HLtoV128
, mkU64(0), mkexpr(halfshift
)));
18907 assign(tmp128
, binop(Iop_64HLtoV128
, mkexpr(halfshift
), mkU64(0)));
18910 UShort mask
= ~(1 << imm8
);
18911 IRTemp res
= newTemp(Ity_V128
);
18912 assign( res
, binop(Iop_OrV128
,
18914 binop(Iop_AndV128
, mkexpr(v128
), mkV128(mask
))) );
18919 static IRTemp
math_PINSRD_128 ( IRTemp v128
, IRTemp u32
, UInt imm8
)
18921 IRTemp z32
= newTemp(Ity_I32
);
18922 assign(z32
, mkU32(0));
18924 /* Surround u32 with zeroes as per imm, giving us something we can
18925 OR into a suitably masked-out v128.*/
18926 IRTemp withZs
= newTemp(Ity_V128
);
18929 case 3: mask
= 0x0FFF;
18930 assign(withZs
, mkV128from32s(u32
, z32
, z32
, z32
));
18932 case 2: mask
= 0xF0FF;
18933 assign(withZs
, mkV128from32s(z32
, u32
, z32
, z32
));
18935 case 1: mask
= 0xFF0F;
18936 assign(withZs
, mkV128from32s(z32
, z32
, u32
, z32
));
18938 case 0: mask
= 0xFFF0;
18939 assign(withZs
, mkV128from32s(z32
, z32
, z32
, u32
));
18941 default: vassert(0);
18944 IRTemp res
= newTemp(Ity_V128
);
18945 assign(res
, binop( Iop_OrV128
,
18947 binop( Iop_AndV128
, mkexpr(v128
), mkV128(mask
) ) ) );
18952 static IRTemp
math_PINSRQ_128 ( IRTemp v128
, IRTemp u64
, UInt imm8
)
18954 /* Surround u64 with zeroes as per imm, giving us something we can
18955 OR into a suitably masked-out v128.*/
18956 IRTemp withZs
= newTemp(Ity_V128
);
18960 assign(withZs
, binop(Iop_64HLtoV128
, mkU64(0), mkexpr(u64
)));
18962 vassert(imm8
== 1);
18964 assign( withZs
, binop(Iop_64HLtoV128
, mkexpr(u64
), mkU64(0)));
18967 IRTemp res
= newTemp(Ity_V128
);
18968 assign( res
, binop( Iop_OrV128
,
18970 binop( Iop_AndV128
, mkexpr(v128
), mkV128(mask
) ) ) );
18975 static IRTemp
math_INSERTPS ( IRTemp dstV
, IRTemp toInsertD
, UInt imm8
)
18977 const IRTemp inval
= IRTemp_INVALID
;
18978 IRTemp dstDs
[4] = { inval
, inval
, inval
, inval
};
18979 breakupV128to32s( dstV
, &dstDs
[3], &dstDs
[2], &dstDs
[1], &dstDs
[0] );
18981 vassert(imm8
<= 255);
18982 dstDs
[(imm8
>> 4) & 3] = toInsertD
; /* "imm8_count_d" */
18984 UInt imm8_zmask
= (imm8
& 15);
18985 IRTemp zero_32
= newTemp(Ity_I32
);
18986 assign( zero_32
, mkU32(0) );
18987 IRTemp resV
= newTemp(Ity_V128
);
18988 assign( resV
, mkV128from32s(
18989 ((imm8_zmask
& 8) == 8) ? zero_32
: dstDs
[3],
18990 ((imm8_zmask
& 4) == 4) ? zero_32
: dstDs
[2],
18991 ((imm8_zmask
& 2) == 2) ? zero_32
: dstDs
[1],
18992 ((imm8_zmask
& 1) == 1) ? zero_32
: dstDs
[0]) );
18997 static Long
dis_PEXTRB_128_GtoE ( const VexAbiInfo
* vbi
, Prefix pfx
,
18998 Long delta
, Bool isAvx
)
19000 IRTemp addr
= IRTemp_INVALID
;
19003 IRTemp xmm_vec
= newTemp(Ity_V128
);
19004 IRTemp sel_lane
= newTemp(Ity_I32
);
19005 IRTemp shr_lane
= newTemp(Ity_I32
);
19006 const HChar
* mbV
= isAvx
? "v" : "";
19007 UChar modrm
= getUChar(delta
);
19008 IRTemp t3
, t2
, t1
, t0
;
19010 assign( xmm_vec
, getXMMReg( gregOfRexRM(pfx
,modrm
) ) );
19011 t3
= t2
= t1
= t0
= IRTemp_INVALID
;
19012 breakupV128to32s( xmm_vec
, &t3
, &t2
, &t1
, &t0
);
19014 if ( epartIsReg( modrm
) ) {
19015 imm8
= (Int
)getUChar(delta
+1);
19017 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
19018 imm8
= (Int
)getUChar(delta
+alen
);
19020 switch ( (imm8
>> 2) & 3 ) {
19021 case 0: assign( sel_lane
, mkexpr(t0
) ); break;
19022 case 1: assign( sel_lane
, mkexpr(t1
) ); break;
19023 case 2: assign( sel_lane
, mkexpr(t2
) ); break;
19024 case 3: assign( sel_lane
, mkexpr(t3
) ); break;
19025 default: vassert(0);
19028 binop( Iop_Shr32
, mkexpr(sel_lane
), mkU8(((imm8
& 3)*8)) ) );
19030 if ( epartIsReg( modrm
) ) {
19031 putIReg64( eregOfRexRM(pfx
,modrm
),
19033 binop(Iop_And32
, mkexpr(shr_lane
), mkU32(255)) ) );
19035 DIP( "%spextrb $%d, %s,%s\n", mbV
, imm8
,
19036 nameXMMReg( gregOfRexRM(pfx
, modrm
) ),
19037 nameIReg64( eregOfRexRM(pfx
, modrm
) ) );
19039 storeLE( mkexpr(addr
), unop(Iop_32to8
, mkexpr(shr_lane
) ) );
19041 DIP( "%spextrb $%d,%s,%s\n", mbV
,
19042 imm8
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ), dis_buf
);
19049 static IRTemp
math_DPPD_128 ( IRTemp src_vec
, IRTemp dst_vec
, UInt imm8
)
19051 vassert(imm8
< 256);
19052 UShort imm8_perms
[4] = { 0x0000, 0x00FF, 0xFF00, 0xFFFF };
19053 IRTemp and_vec
= newTemp(Ity_V128
);
19054 IRTemp sum_vec
= newTemp(Ity_V128
);
19055 IRTemp rm
= newTemp(Ity_I32
);
19056 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
19057 assign( and_vec
, binop( Iop_AndV128
,
19058 triop( Iop_Mul64Fx2
,
19060 mkexpr(dst_vec
), mkexpr(src_vec
) ),
19061 mkV128( imm8_perms
[ ((imm8
>> 4) & 3) ] ) ) );
19063 assign( sum_vec
, binop( Iop_Add64F0x2
,
19064 binop( Iop_InterleaveHI64x2
,
19065 mkexpr(and_vec
), mkexpr(and_vec
) ),
19066 binop( Iop_InterleaveLO64x2
,
19067 mkexpr(and_vec
), mkexpr(and_vec
) ) ) );
19068 IRTemp res
= newTemp(Ity_V128
);
19069 assign(res
, binop( Iop_AndV128
,
19070 binop( Iop_InterleaveLO64x2
,
19071 mkexpr(sum_vec
), mkexpr(sum_vec
) ),
19072 mkV128( imm8_perms
[ (imm8
& 3) ] ) ) );
19077 static IRTemp
math_DPPS_128 ( IRTemp src_vec
, IRTemp dst_vec
, UInt imm8
)
19079 vassert(imm8
< 256);
19080 IRTemp tmp_prod_vec
= newTemp(Ity_V128
);
19081 IRTemp prod_vec
= newTemp(Ity_V128
);
19082 IRTemp sum_vec
= newTemp(Ity_V128
);
19083 IRTemp rm
= newTemp(Ity_I32
);
19084 IRTemp v3
, v2
, v1
, v0
;
19085 v3
= v2
= v1
= v0
= IRTemp_INVALID
;
19086 UShort imm8_perms
[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00,
19087 0x0F0F, 0x0FF0, 0x0FFF, 0xF000, 0xF00F,
19088 0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0,
19091 assign( rm
, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
19092 assign( tmp_prod_vec
,
19093 binop( Iop_AndV128
,
19094 triop( Iop_Mul32Fx4
,
19095 mkexpr(rm
), mkexpr(dst_vec
), mkexpr(src_vec
) ),
19096 mkV128( imm8_perms
[((imm8
>> 4)& 15)] ) ) );
19097 breakupV128to32s( tmp_prod_vec
, &v3
, &v2
, &v1
, &v0
);
19098 assign( prod_vec
, mkV128from32s( v3
, v1
, v2
, v0
) );
19100 assign( sum_vec
, triop( Iop_Add32Fx4
,
19102 binop( Iop_InterleaveHI32x4
,
19103 mkexpr(prod_vec
), mkexpr(prod_vec
) ),
19104 binop( Iop_InterleaveLO32x4
,
19105 mkexpr(prod_vec
), mkexpr(prod_vec
) ) ) );
19107 IRTemp res
= newTemp(Ity_V128
);
19108 assign( res
, binop( Iop_AndV128
,
19109 triop( Iop_Add32Fx4
,
19111 binop( Iop_InterleaveHI32x4
,
19112 mkexpr(sum_vec
), mkexpr(sum_vec
) ),
19113 binop( Iop_InterleaveLO32x4
,
19114 mkexpr(sum_vec
), mkexpr(sum_vec
) ) ),
19115 mkV128( imm8_perms
[ (imm8
& 15) ] ) ) );
19120 static IRTemp
math_MPSADBW_128 ( IRTemp dst_vec
, IRTemp src_vec
, UInt imm8
)
19122 /* Mask out bits of the operands we don't need. This isn't
19123 strictly necessary, but it does ensure Memcheck doesn't
19124 give us any false uninitialised value errors as a
19126 UShort src_mask
[4] = { 0x000F, 0x00F0, 0x0F00, 0xF000 };
19127 UShort dst_mask
[2] = { 0x07FF, 0x7FF0 };
19129 IRTemp src_maskV
= newTemp(Ity_V128
);
19130 IRTemp dst_maskV
= newTemp(Ity_V128
);
19131 assign(src_maskV
, mkV128( src_mask
[ imm8
& 3 ] ));
19132 assign(dst_maskV
, mkV128( dst_mask
[ (imm8
>> 2) & 1 ] ));
19134 IRTemp src_masked
= newTemp(Ity_V128
);
19135 IRTemp dst_masked
= newTemp(Ity_V128
);
19136 assign(src_masked
, binop(Iop_AndV128
, mkexpr(src_vec
), mkexpr(src_maskV
)));
19137 assign(dst_masked
, binop(Iop_AndV128
, mkexpr(dst_vec
), mkexpr(dst_maskV
)));
19139 /* Generate 4 64 bit values that we can hand to a clean helper */
19140 IRTemp sHi
= newTemp(Ity_I64
);
19141 IRTemp sLo
= newTemp(Ity_I64
);
19142 assign( sHi
, unop(Iop_V128HIto64
, mkexpr(src_masked
)) );
19143 assign( sLo
, unop(Iop_V128to64
, mkexpr(src_masked
)) );
19145 IRTemp dHi
= newTemp(Ity_I64
);
19146 IRTemp dLo
= newTemp(Ity_I64
);
19147 assign( dHi
, unop(Iop_V128HIto64
, mkexpr(dst_masked
)) );
19148 assign( dLo
, unop(Iop_V128to64
, mkexpr(dst_masked
)) );
19150 /* Compute halves of the result separately */
19151 IRTemp resHi
= newTemp(Ity_I64
);
19152 IRTemp resLo
= newTemp(Ity_I64
);
19155 = mkIRExprVec_5( mkexpr(sHi
), mkexpr(sLo
), mkexpr(dHi
), mkexpr(dLo
),
19156 mkU64( 0x80 | (imm8
& 7) ));
19158 = mkIRExprVec_5( mkexpr(sHi
), mkexpr(sLo
), mkexpr(dHi
), mkexpr(dLo
),
19159 mkU64( 0x00 | (imm8
& 7) ));
19161 assign(resHi
, mkIRExprCCall( Ity_I64
, 0/*regparm*/,
19162 "amd64g_calc_mpsadbw",
19163 &amd64g_calc_mpsadbw
, argsHi
));
19164 assign(resLo
, mkIRExprCCall( Ity_I64
, 0/*regparm*/,
19165 "amd64g_calc_mpsadbw",
19166 &amd64g_calc_mpsadbw
, argsLo
));
19168 IRTemp res
= newTemp(Ity_V128
);
19169 assign(res
, binop(Iop_64HLtoV128
, mkexpr(resHi
), mkexpr(resLo
)));
19173 static Long
dis_EXTRACTPS ( const VexAbiInfo
* vbi
, Prefix pfx
,
19174 Long delta
, Bool isAvx
)
19176 IRTemp addr
= IRTemp_INVALID
;
19179 UChar modrm
= getUChar(delta
);
19181 IRTemp xmm_vec
= newTemp(Ity_V128
);
19182 IRTemp src_dword
= newTemp(Ity_I32
);
19183 UInt rG
= gregOfRexRM(pfx
,modrm
);
19184 IRTemp t3
, t2
, t1
, t0
;
19185 t3
= t2
= t1
= t0
= IRTemp_INVALID
;
19187 assign( xmm_vec
, getXMMReg( rG
) );
19188 breakupV128to32s( xmm_vec
, &t3
, &t2
, &t1
, &t0
);
19190 if ( epartIsReg( modrm
) ) {
19191 imm8_10
= (Int
)(getUChar(delta
+1) & 3);
19193 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
19194 imm8_10
= (Int
)(getUChar(delta
+alen
) & 3);
19197 switch ( imm8_10
) {
19198 case 0: assign( src_dword
, mkexpr(t0
) ); break;
19199 case 1: assign( src_dword
, mkexpr(t1
) ); break;
19200 case 2: assign( src_dword
, mkexpr(t2
) ); break;
19201 case 3: assign( src_dword
, mkexpr(t3
) ); break;
19202 default: vassert(0);
19205 if ( epartIsReg( modrm
) ) {
19206 UInt rE
= eregOfRexRM(pfx
,modrm
);
19207 putIReg32( rE
, mkexpr(src_dword
) );
19209 DIP( "%sextractps $%d, %s,%s\n", isAvx
? "v" : "", imm8_10
,
19210 nameXMMReg( rG
), nameIReg32( rE
) );
19212 storeLE( mkexpr(addr
), mkexpr(src_dword
) );
19214 DIP( "%sextractps $%d, %s,%s\n", isAvx
? "v" : "", imm8_10
,
19215 nameXMMReg( rG
), dis_buf
);
19222 static IRTemp
math_PCLMULQDQ( IRTemp dV
, IRTemp sV
, UInt imm8
)
19224 IRTemp t0
= newTemp(Ity_I64
);
19225 IRTemp t1
= newTemp(Ity_I64
);
19226 assign(t0
, unop((imm8
&1)? Iop_V128HIto64
: Iop_V128to64
,
19228 assign(t1
, unop((imm8
&16) ? Iop_V128HIto64
: Iop_V128to64
,
19231 IRTemp t2
= newTemp(Ity_I64
);
19232 IRTemp t3
= newTemp(Ity_I64
);
19236 args
= mkIRExprVec_3(mkexpr(t0
), mkexpr(t1
), mkU64(0));
19237 assign(t2
, mkIRExprCCall(Ity_I64
,0, "amd64g_calculate_pclmul",
19238 &amd64g_calculate_pclmul
, args
));
19239 args
= mkIRExprVec_3(mkexpr(t0
), mkexpr(t1
), mkU64(1));
19240 assign(t3
, mkIRExprCCall(Ity_I64
,0, "amd64g_calculate_pclmul",
19241 &amd64g_calculate_pclmul
, args
));
19243 IRTemp res
= newTemp(Ity_V128
);
19244 assign(res
, binop(Iop_64HLtoV128
, mkexpr(t3
), mkexpr(t2
)));
19249 __attribute__((noinline
))
19251 Long
dis_ESC_0F3A__SSE4 ( Bool
* decode_OK
,
19252 const VexAbiInfo
* vbi
,
19253 Prefix pfx
, Int sz
, Long deltaIN
)
19255 IRTemp addr
= IRTemp_INVALID
;
19260 *decode_OK
= False
;
19262 Long delta
= deltaIN
;
19263 UChar opc
= getUChar(delta
);
19268 /* 66 0F 3A 08 /r ib = ROUNDPS imm8, xmm2/m128, xmm1 */
19269 if (have66noF2noF3(pfx
) && sz
== 2) {
19271 IRTemp src0
= newTemp(Ity_F32
);
19272 IRTemp src1
= newTemp(Ity_F32
);
19273 IRTemp src2
= newTemp(Ity_F32
);
19274 IRTemp src3
= newTemp(Ity_F32
);
19275 IRTemp res0
= newTemp(Ity_F32
);
19276 IRTemp res1
= newTemp(Ity_F32
);
19277 IRTemp res2
= newTemp(Ity_F32
);
19278 IRTemp res3
= newTemp(Ity_F32
);
19279 IRTemp rm
= newTemp(Ity_I32
);
19282 modrm
= getUChar(delta
);
19284 if (epartIsReg(modrm
)) {
19286 getXMMRegLane32F( eregOfRexRM(pfx
, modrm
), 0 ) );
19288 getXMMRegLane32F( eregOfRexRM(pfx
, modrm
), 1 ) );
19290 getXMMRegLane32F( eregOfRexRM(pfx
, modrm
), 2 ) );
19292 getXMMRegLane32F( eregOfRexRM(pfx
, modrm
), 3 ) );
19293 imm
= getUChar(delta
+1);
19294 if (imm
& ~15) goto decode_failure
;
19296 DIP( "roundps $%d,%s,%s\n",
19297 imm
, nameXMMReg( eregOfRexRM(pfx
, modrm
) ),
19298 nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19300 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
19301 gen_SIGNAL_if_not_16_aligned(vbi
, addr
);
19302 assign( src0
, loadLE(Ity_F32
,
19303 binop(Iop_Add64
, mkexpr(addr
), mkU64(0) )));
19304 assign( src1
, loadLE(Ity_F32
,
19305 binop(Iop_Add64
, mkexpr(addr
), mkU64(4) )));
19306 assign( src2
, loadLE(Ity_F32
,
19307 binop(Iop_Add64
, mkexpr(addr
), mkU64(8) )));
19308 assign( src3
, loadLE(Ity_F32
,
19309 binop(Iop_Add64
, mkexpr(addr
), mkU64(12) )));
19310 imm
= getUChar(delta
+alen
);
19311 if (imm
& ~15) goto decode_failure
;
19313 DIP( "roundps $%d,%s,%s\n",
19314 imm
, dis_buf
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19317 /* (imm & 3) contains an Intel-encoded rounding mode. Because
19318 that encoding is the same as the encoding for IRRoundingMode,
19319 we can use that value directly in the IR as a rounding
19321 assign(rm
, (imm
& 4) ? get_sse_roundingmode() : mkU32(imm
& 3));
19323 assign(res0
, binop(Iop_RoundF32toInt
, mkexpr(rm
), mkexpr(src0
)) );
19324 assign(res1
, binop(Iop_RoundF32toInt
, mkexpr(rm
), mkexpr(src1
)) );
19325 assign(res2
, binop(Iop_RoundF32toInt
, mkexpr(rm
), mkexpr(src2
)) );
19326 assign(res3
, binop(Iop_RoundF32toInt
, mkexpr(rm
), mkexpr(src3
)) );
19328 putXMMRegLane32F( gregOfRexRM(pfx
, modrm
), 0, mkexpr(res0
) );
19329 putXMMRegLane32F( gregOfRexRM(pfx
, modrm
), 1, mkexpr(res1
) );
19330 putXMMRegLane32F( gregOfRexRM(pfx
, modrm
), 2, mkexpr(res2
) );
19331 putXMMRegLane32F( gregOfRexRM(pfx
, modrm
), 3, mkexpr(res3
) );
19333 goto decode_success
;
19338 /* 66 0F 3A 09 /r ib = ROUNDPD imm8, xmm2/m128, xmm1 */
19339 if (have66noF2noF3(pfx
) && sz
== 2) {
19341 IRTemp src0
= newTemp(Ity_F64
);
19342 IRTemp src1
= newTemp(Ity_F64
);
19343 IRTemp res0
= newTemp(Ity_F64
);
19344 IRTemp res1
= newTemp(Ity_F64
);
19345 IRTemp rm
= newTemp(Ity_I32
);
19348 modrm
= getUChar(delta
);
19350 if (epartIsReg(modrm
)) {
19352 getXMMRegLane64F( eregOfRexRM(pfx
, modrm
), 0 ) );
19354 getXMMRegLane64F( eregOfRexRM(pfx
, modrm
), 1 ) );
19355 imm
= getUChar(delta
+1);
19356 if (imm
& ~15) goto decode_failure
;
19358 DIP( "roundpd $%d,%s,%s\n",
19359 imm
, nameXMMReg( eregOfRexRM(pfx
, modrm
) ),
19360 nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19362 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
19363 gen_SIGNAL_if_not_16_aligned(vbi
, addr
);
19364 assign( src0
, loadLE(Ity_F64
,
19365 binop(Iop_Add64
, mkexpr(addr
), mkU64(0) )));
19366 assign( src1
, loadLE(Ity_F64
,
19367 binop(Iop_Add64
, mkexpr(addr
), mkU64(8) )));
19368 imm
= getUChar(delta
+alen
);
19369 if (imm
& ~15) goto decode_failure
;
19371 DIP( "roundpd $%d,%s,%s\n",
19372 imm
, dis_buf
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19375 /* (imm & 3) contains an Intel-encoded rounding mode. Because
19376 that encoding is the same as the encoding for IRRoundingMode,
19377 we can use that value directly in the IR as a rounding
19379 assign(rm
, (imm
& 4) ? get_sse_roundingmode() : mkU32(imm
& 3));
19381 assign(res0
, binop(Iop_RoundF64toInt
, mkexpr(rm
), mkexpr(src0
)) );
19382 assign(res1
, binop(Iop_RoundF64toInt
, mkexpr(rm
), mkexpr(src1
)) );
19384 putXMMRegLane64F( gregOfRexRM(pfx
, modrm
), 0, mkexpr(res0
) );
19385 putXMMRegLane64F( gregOfRexRM(pfx
, modrm
), 1, mkexpr(res1
) );
19387 goto decode_success
;
19393 /* 66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1
19394 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1
19396 if (have66noF2noF3(pfx
) && sz
== 2) {
19398 Bool isD
= opc
== 0x0B;
19399 IRTemp src
= newTemp(isD
? Ity_F64
: Ity_F32
);
19400 IRTemp res
= newTemp(isD
? Ity_F64
: Ity_F32
);
19403 modrm
= getUChar(delta
);
19405 if (epartIsReg(modrm
)) {
19407 isD
? getXMMRegLane64F( eregOfRexRM(pfx
, modrm
), 0 )
19408 : getXMMRegLane32F( eregOfRexRM(pfx
, modrm
), 0 ) );
19409 imm
= getUChar(delta
+1);
19410 if (imm
& ~15) goto decode_failure
;
19412 DIP( "rounds%c $%d,%s,%s\n",
19414 imm
, nameXMMReg( eregOfRexRM(pfx
, modrm
) ),
19415 nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19417 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
19418 assign( src
, loadLE( isD
? Ity_F64
: Ity_F32
, mkexpr(addr
) ));
19419 imm
= getUChar(delta
+alen
);
19420 if (imm
& ~15) goto decode_failure
;
19422 DIP( "rounds%c $%d,%s,%s\n",
19424 imm
, dis_buf
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19427 /* (imm & 3) contains an Intel-encoded rounding mode. Because
19428 that encoding is the same as the encoding for IRRoundingMode,
19429 we can use that value directly in the IR as a rounding
19431 assign(res
, binop(isD
? Iop_RoundF64toInt
: Iop_RoundF32toInt
,
19432 (imm
& 4) ? get_sse_roundingmode()
19437 putXMMRegLane64F( gregOfRexRM(pfx
, modrm
), 0, mkexpr(res
) );
19439 putXMMRegLane32F( gregOfRexRM(pfx
, modrm
), 0, mkexpr(res
) );
19441 goto decode_success
;
19446 /* 66 0F 3A 0C /r ib = BLENDPS xmm1, xmm2/m128, imm8
19447 Blend Packed Single Precision Floating-Point Values (XMM) */
19448 if (have66noF2noF3(pfx
) && sz
== 2) {
19451 IRTemp dst_vec
= newTemp(Ity_V128
);
19452 IRTemp src_vec
= newTemp(Ity_V128
);
19454 modrm
= getUChar(delta
);
19456 assign( dst_vec
, getXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19458 if ( epartIsReg( modrm
) ) {
19459 imm8
= (Int
)getUChar(delta
+1);
19460 assign( src_vec
, getXMMReg( eregOfRexRM(pfx
, modrm
) ) );
19462 DIP( "blendps $%d, %s,%s\n", imm8
,
19463 nameXMMReg( eregOfRexRM(pfx
, modrm
) ),
19464 nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19466 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
19467 1/* imm8 is 1 byte after the amode */ );
19468 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
19469 assign( src_vec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
19470 imm8
= (Int
)getUChar(delta
+alen
);
19472 DIP( "blendps $%d, %s,%s\n",
19473 imm8
, dis_buf
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19476 putXMMReg( gregOfRexRM(pfx
, modrm
),
19477 mkexpr( math_BLENDPS_128( src_vec
, dst_vec
, imm8
) ) );
19478 goto decode_success
;
19483 /* 66 0F 3A 0D /r ib = BLENDPD xmm1, xmm2/m128, imm8
19484 Blend Packed Double Precision Floating-Point Values (XMM) */
19485 if (have66noF2noF3(pfx
) && sz
== 2) {
19488 IRTemp dst_vec
= newTemp(Ity_V128
);
19489 IRTemp src_vec
= newTemp(Ity_V128
);
19491 modrm
= getUChar(delta
);
19492 assign( dst_vec
, getXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19494 if ( epartIsReg( modrm
) ) {
19495 imm8
= (Int
)getUChar(delta
+1);
19496 assign( src_vec
, getXMMReg( eregOfRexRM(pfx
, modrm
) ) );
19498 DIP( "blendpd $%d, %s,%s\n", imm8
,
19499 nameXMMReg( eregOfRexRM(pfx
, modrm
) ),
19500 nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19502 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
19503 1/* imm8 is 1 byte after the amode */ );
19504 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
19505 assign( src_vec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
19506 imm8
= (Int
)getUChar(delta
+alen
);
19508 DIP( "blendpd $%d, %s,%s\n",
19509 imm8
, dis_buf
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19512 putXMMReg( gregOfRexRM(pfx
, modrm
),
19513 mkexpr( math_BLENDPD_128( src_vec
, dst_vec
, imm8
) ) );
19514 goto decode_success
;
19519 /* 66 0F 3A 0E /r ib = PBLENDW xmm1, xmm2/m128, imm8
19520 Blend Packed Words (XMM) */
19521 if (have66noF2noF3(pfx
) && sz
== 2) {
19524 IRTemp dst_vec
= newTemp(Ity_V128
);
19525 IRTemp src_vec
= newTemp(Ity_V128
);
19527 modrm
= getUChar(delta
);
19529 assign( dst_vec
, getXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19531 if ( epartIsReg( modrm
) ) {
19532 imm8
= (Int
)getUChar(delta
+1);
19533 assign( src_vec
, getXMMReg( eregOfRexRM(pfx
, modrm
) ) );
19535 DIP( "pblendw $%d, %s,%s\n", imm8
,
19536 nameXMMReg( eregOfRexRM(pfx
, modrm
) ),
19537 nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19539 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
19540 1/* imm8 is 1 byte after the amode */ );
19541 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
19542 assign( src_vec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
19543 imm8
= (Int
)getUChar(delta
+alen
);
19545 DIP( "pblendw $%d, %s,%s\n",
19546 imm8
, dis_buf
, nameXMMReg( gregOfRexRM(pfx
, modrm
) ) );
19549 putXMMReg( gregOfRexRM(pfx
, modrm
),
19550 mkexpr( math_PBLENDW_128( src_vec
, dst_vec
, imm8
) ) );
19551 goto decode_success
;
19556 /* 66 0F 3A 14 /r ib = PEXTRB r/m16, xmm, imm8
19557 Extract Byte from xmm, store in mem or zero-extend + store in gen.reg.
19559 if (have66noF2noF3(pfx
) && sz
== 2) {
19560 delta
= dis_PEXTRB_128_GtoE( vbi
, pfx
, delta
, False
/*!isAvx*/ );
19561 goto decode_success
;
19566 /* 66 0F 3A 15 /r ib = PEXTRW r/m16, xmm, imm8
19567 Extract Word from xmm, store in mem or zero-extend + store in gen.reg.
19569 if (have66noF2noF3(pfx
) && sz
== 2) {
19570 delta
= dis_PEXTRW( vbi
, pfx
, delta
, False
/*!isAvx*/ );
19571 goto decode_success
;
19576 /* 66 no-REX.W 0F 3A 16 /r ib = PEXTRD reg/mem32, xmm2, imm8
19577 Extract Doubleword int from xmm reg and store in gen.reg or mem. (XMM)
19578 Note that this insn has the same opcodes as PEXTRQ, but
19579 here the REX.W bit is _not_ present */
19580 if (have66noF2noF3(pfx
)
19581 && sz
== 2 /* REX.W is _not_ present */) {
19582 delta
= dis_PEXTRD( vbi
, pfx
, delta
, False
/*!isAvx*/ );
19583 goto decode_success
;
19585 /* 66 REX.W 0F 3A 16 /r ib = PEXTRQ reg/mem64, xmm2, imm8
19586 Extract Quadword int from xmm reg and store in gen.reg or mem. (XMM)
19587 Note that this insn has the same opcodes as PEXTRD, but
19588 here the REX.W bit is present */
19589 if (have66noF2noF3(pfx
)
19590 && sz
== 8 /* REX.W is present */) {
19591 delta
= dis_PEXTRQ( vbi
, pfx
, delta
, False
/*!isAvx*/);
19592 goto decode_success
;
19597 /* 66 0F 3A 17 /r ib = EXTRACTPS reg/mem32, xmm2, imm8 Extract
19598 float from xmm reg and store in gen.reg or mem. This is
19599 identical to PEXTRD, except that REX.W appears to be ignored.
19601 if (have66noF2noF3(pfx
)
19602 && (sz
== 2 || /* ignore redundant REX.W */ sz
== 8)) {
19603 delta
= dis_EXTRACTPS( vbi
, pfx
, delta
, False
/*!isAvx*/ );
19604 goto decode_success
;
19609 /* 66 0F 3A 20 /r ib = PINSRB xmm1, r32/m8, imm8
19610 Extract byte from r32/m8 and insert into xmm1 */
19611 if (have66noF2noF3(pfx
) && sz
== 2) {
19613 IRTemp new8
= newTemp(Ity_I8
);
19614 modrm
= getUChar(delta
);
19615 UInt rG
= gregOfRexRM(pfx
, modrm
);
19616 if ( epartIsReg( modrm
) ) {
19617 UInt rE
= eregOfRexRM(pfx
,modrm
);
19618 imm8
= (Int
)(getUChar(delta
+1) & 0xF);
19619 assign( new8
, unop(Iop_32to8
, getIReg32(rE
)) );
19621 DIP( "pinsrb $%d,%s,%s\n", imm8
,
19622 nameIReg32(rE
), nameXMMReg(rG
) );
19624 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
19625 imm8
= (Int
)(getUChar(delta
+alen
) & 0xF);
19626 assign( new8
, loadLE( Ity_I8
, mkexpr(addr
) ) );
19628 DIP( "pinsrb $%d,%s,%s\n",
19629 imm8
, dis_buf
, nameXMMReg(rG
) );
19631 IRTemp src_vec
= newTemp(Ity_V128
);
19632 assign(src_vec
, getXMMReg( gregOfRexRM(pfx
, modrm
) ));
19633 IRTemp res
= math_PINSRB_128( src_vec
, new8
, imm8
);
19634 putXMMReg( rG
, mkexpr(res
) );
19635 goto decode_success
;
19640 /* 66 0F 3A 21 /r ib = INSERTPS imm8, xmm2/m32, xmm1
19641 Insert Packed Single Precision Floating-Point Value (XMM) */
19642 if (have66noF2noF3(pfx
) && sz
== 2) {
19644 IRTemp d2ins
= newTemp(Ity_I32
); /* comes from the E part */
19645 const IRTemp inval
= IRTemp_INVALID
;
19647 modrm
= getUChar(delta
);
19648 UInt rG
= gregOfRexRM(pfx
, modrm
);
19650 if ( epartIsReg( modrm
) ) {
19651 UInt rE
= eregOfRexRM(pfx
, modrm
);
19652 IRTemp vE
= newTemp(Ity_V128
);
19653 assign( vE
, getXMMReg(rE
) );
19654 IRTemp dsE
[4] = { inval
, inval
, inval
, inval
};
19655 breakupV128to32s( vE
, &dsE
[3], &dsE
[2], &dsE
[1], &dsE
[0] );
19656 imm8
= getUChar(delta
+1);
19657 d2ins
= dsE
[(imm8
>> 6) & 3]; /* "imm8_count_s" */
19659 DIP( "insertps $%u, %s,%s\n",
19660 imm8
, nameXMMReg(rE
), nameXMMReg(rG
) );
19662 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
19663 assign( d2ins
, loadLE( Ity_I32
, mkexpr(addr
) ) );
19664 imm8
= getUChar(delta
+alen
);
19666 DIP( "insertps $%u, %s,%s\n",
19667 imm8
, dis_buf
, nameXMMReg(rG
) );
19670 IRTemp vG
= newTemp(Ity_V128
);
19671 assign( vG
, getXMMReg(rG
) );
19673 putXMMReg( rG
, mkexpr(math_INSERTPS( vG
, d2ins
, imm8
)) );
19674 goto decode_success
;
19679 /* 66 no-REX.W 0F 3A 22 /r ib = PINSRD xmm1, r/m32, imm8
19680 Extract Doubleword int from gen.reg/mem32 and insert into xmm1 */
19681 if (have66noF2noF3(pfx
)
19682 && sz
== 2 /* REX.W is NOT present */) {
19684 IRTemp src_u32
= newTemp(Ity_I32
);
19685 modrm
= getUChar(delta
);
19686 UInt rG
= gregOfRexRM(pfx
, modrm
);
19688 if ( epartIsReg( modrm
) ) {
19689 UInt rE
= eregOfRexRM(pfx
,modrm
);
19690 imm8_10
= (Int
)(getUChar(delta
+1) & 3);
19691 assign( src_u32
, getIReg32( rE
) );
19693 DIP( "pinsrd $%d, %s,%s\n",
19694 imm8_10
, nameIReg32(rE
), nameXMMReg(rG
) );
19696 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
19697 imm8_10
= (Int
)(getUChar(delta
+alen
) & 3);
19698 assign( src_u32
, loadLE( Ity_I32
, mkexpr(addr
) ) );
19700 DIP( "pinsrd $%d, %s,%s\n",
19701 imm8_10
, dis_buf
, nameXMMReg(rG
) );
19704 IRTemp src_vec
= newTemp(Ity_V128
);
19705 assign(src_vec
, getXMMReg( rG
));
19706 IRTemp res_vec
= math_PINSRD_128( src_vec
, src_u32
, imm8_10
);
19707 putXMMReg( rG
, mkexpr(res_vec
) );
19708 goto decode_success
;
19710 /* 66 REX.W 0F 3A 22 /r ib = PINSRQ xmm1, r/m64, imm8
19711 Extract Quadword int from gen.reg/mem64 and insert into xmm1 */
19712 if (have66noF2noF3(pfx
)
19713 && sz
== 8 /* REX.W is present */) {
19715 IRTemp src_u64
= newTemp(Ity_I64
);
19716 modrm
= getUChar(delta
);
19717 UInt rG
= gregOfRexRM(pfx
, modrm
);
19719 if ( epartIsReg( modrm
) ) {
19720 UInt rE
= eregOfRexRM(pfx
,modrm
);
19721 imm8_0
= (Int
)(getUChar(delta
+1) & 1);
19722 assign( src_u64
, getIReg64( rE
) );
19724 DIP( "pinsrq $%d, %s,%s\n",
19725 imm8_0
, nameIReg64(rE
), nameXMMReg(rG
) );
19727 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
19728 imm8_0
= (Int
)(getUChar(delta
+alen
) & 1);
19729 assign( src_u64
, loadLE( Ity_I64
, mkexpr(addr
) ) );
19731 DIP( "pinsrq $%d, %s,%s\n",
19732 imm8_0
, dis_buf
, nameXMMReg(rG
) );
19735 IRTemp src_vec
= newTemp(Ity_V128
);
19736 assign(src_vec
, getXMMReg( rG
));
19737 IRTemp res_vec
= math_PINSRQ_128( src_vec
, src_u64
, imm8_0
);
19738 putXMMReg( rG
, mkexpr(res_vec
) );
19739 goto decode_success
;
19744 /* 66 0F 3A 40 /r ib = DPPS xmm1, xmm2/m128, imm8
19745 Dot Product of Packed Single Precision Floating-Point Values (XMM) */
19746 if (have66noF2noF3(pfx
) && sz
== 2) {
19747 modrm
= getUChar(delta
);
19749 IRTemp src_vec
= newTemp(Ity_V128
);
19750 IRTemp dst_vec
= newTemp(Ity_V128
);
19751 UInt rG
= gregOfRexRM(pfx
, modrm
);
19752 assign( dst_vec
, getXMMReg( rG
) );
19753 if ( epartIsReg( modrm
) ) {
19754 UInt rE
= eregOfRexRM(pfx
, modrm
);
19755 imm8
= (Int
)getUChar(delta
+1);
19756 assign( src_vec
, getXMMReg(rE
) );
19758 DIP( "dpps $%d, %s,%s\n",
19759 imm8
, nameXMMReg(rE
), nameXMMReg(rG
) );
19761 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
19762 1/* imm8 is 1 byte after the amode */ );
19763 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
19764 assign( src_vec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
19765 imm8
= (Int
)getUChar(delta
+alen
);
19767 DIP( "dpps $%d, %s,%s\n",
19768 imm8
, dis_buf
, nameXMMReg(rG
) );
19770 IRTemp res
= math_DPPS_128( src_vec
, dst_vec
, imm8
);
19771 putXMMReg( rG
, mkexpr(res
) );
19772 goto decode_success
;
19777 /* 66 0F 3A 41 /r ib = DPPD xmm1, xmm2/m128, imm8
19778 Dot Product of Packed Double Precision Floating-Point Values (XMM) */
19779 if (have66noF2noF3(pfx
) && sz
== 2) {
19780 modrm
= getUChar(delta
);
19782 IRTemp src_vec
= newTemp(Ity_V128
);
19783 IRTemp dst_vec
= newTemp(Ity_V128
);
19784 UInt rG
= gregOfRexRM(pfx
, modrm
);
19785 assign( dst_vec
, getXMMReg( rG
) );
19786 if ( epartIsReg( modrm
) ) {
19787 UInt rE
= eregOfRexRM(pfx
, modrm
);
19788 imm8
= (Int
)getUChar(delta
+1);
19789 assign( src_vec
, getXMMReg(rE
) );
19791 DIP( "dppd $%d, %s,%s\n",
19792 imm8
, nameXMMReg(rE
), nameXMMReg(rG
) );
19794 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
19795 1/* imm8 is 1 byte after the amode */ );
19796 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
19797 assign( src_vec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
19798 imm8
= (Int
)getUChar(delta
+alen
);
19800 DIP( "dppd $%d, %s,%s\n",
19801 imm8
, dis_buf
, nameXMMReg(rG
) );
19803 IRTemp res
= math_DPPD_128( src_vec
, dst_vec
, imm8
);
19804 putXMMReg( rG
, mkexpr(res
) );
19805 goto decode_success
;
19810 /* 66 0F 3A 42 /r ib = MPSADBW xmm1, xmm2/m128, imm8
19811 Multiple Packed Sums of Absolule Difference (XMM) */
19812 if (have66noF2noF3(pfx
) && sz
== 2) {
19814 IRTemp src_vec
= newTemp(Ity_V128
);
19815 IRTemp dst_vec
= newTemp(Ity_V128
);
19816 modrm
= getUChar(delta
);
19817 UInt rG
= gregOfRexRM(pfx
, modrm
);
19819 assign( dst_vec
, getXMMReg(rG
) );
19821 if ( epartIsReg( modrm
) ) {
19822 UInt rE
= eregOfRexRM(pfx
, modrm
);
19824 imm8
= (Int
)getUChar(delta
+1);
19825 assign( src_vec
, getXMMReg(rE
) );
19827 DIP( "mpsadbw $%d, %s,%s\n", imm8
,
19828 nameXMMReg(rE
), nameXMMReg(rG
) );
19830 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
19831 1/* imm8 is 1 byte after the amode */ );
19832 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
19833 assign( src_vec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
19834 imm8
= (Int
)getUChar(delta
+alen
);
19836 DIP( "mpsadbw $%d, %s,%s\n", imm8
, dis_buf
, nameXMMReg(rG
) );
19839 putXMMReg( rG
, mkexpr( math_MPSADBW_128(dst_vec
, src_vec
, imm8
) ) );
19840 goto decode_success
;
19845 /* 66 0F 3A 44 /r ib = PCLMULQDQ xmm1, xmm2/m128, imm8
19846 * Carry-less multiplication of selected XMM quadwords into XMM
19847 * registers (a.k.a multiplication of polynomials over GF(2))
19849 if (have66noF2noF3(pfx
) && sz
== 2) {
19852 IRTemp svec
= newTemp(Ity_V128
);
19853 IRTemp dvec
= newTemp(Ity_V128
);
19854 modrm
= getUChar(delta
);
19855 UInt rG
= gregOfRexRM(pfx
, modrm
);
19857 assign( dvec
, getXMMReg(rG
) );
19859 if ( epartIsReg( modrm
) ) {
19860 UInt rE
= eregOfRexRM(pfx
, modrm
);
19861 imm8
= (Int
)getUChar(delta
+1);
19862 assign( svec
, getXMMReg(rE
) );
19864 DIP( "pclmulqdq $%d, %s,%s\n", imm8
,
19865 nameXMMReg(rE
), nameXMMReg(rG
) );
19867 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
19868 1/* imm8 is 1 byte after the amode */ );
19869 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
19870 assign( svec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
19871 imm8
= (Int
)getUChar(delta
+alen
);
19873 DIP( "pclmulqdq $%d, %s,%s\n",
19874 imm8
, dis_buf
, nameXMMReg(rG
) );
19877 putXMMReg( rG
, mkexpr( math_PCLMULQDQ(dvec
, svec
, imm8
) ) );
19878 goto decode_success
;
19886 /* 66 0F 3A 63 /r ib = PCMPISTRI imm8, xmm2/m128, xmm1
19887 66 0F 3A 62 /r ib = PCMPISTRM imm8, xmm2/m128, xmm1
19888 66 0F 3A 61 /r ib = PCMPESTRI imm8, xmm2/m128, xmm1
19889 66 0F 3A 60 /r ib = PCMPESTRM imm8, xmm2/m128, xmm1
19890 (selected special cases that actually occur in glibc,
19891 not by any means a complete implementation.)
19893 if (have66noF2noF3(pfx
) && sz
== 2) {
19894 Long delta0
= delta
;
19895 delta
= dis_PCMPxSTRx( vbi
, pfx
, delta
, False
/*!isAvx*/, opc
);
19896 if (delta
> delta0
) goto decode_success
;
19897 /* else fall though; dis_PCMPxSTRx failed to decode it */
19902 /* 66 0F 3A DF /r ib = AESKEYGENASSIST imm8, xmm2/m128, xmm1 */
19903 if (have66noF2noF3(pfx
) && sz
== 2) {
19904 delta
= dis_AESKEYGENASSIST( vbi
, pfx
, delta
, False
/*!isAvx*/ );
19905 goto decode_success
;
19915 *decode_OK
= False
;
19924 /*------------------------------------------------------------*/
19926 /*--- Top-level post-escape decoders: dis_ESC_NONE ---*/
19928 /*------------------------------------------------------------*/
19930 __attribute__((noinline
))
19932 Long
dis_ESC_NONE (
19933 /*MB_OUT*/DisResult
* dres
,
19934 /*MB_OUT*/Bool
* expect_CAS
,
19935 const VexArchInfo
* archinfo
,
19936 const VexAbiInfo
* vbi
,
19937 Prefix pfx
, Int sz
, Long deltaIN
19942 IRTemp addr
= IRTemp_INVALID
;
19943 IRTemp t1
= IRTemp_INVALID
;
19944 IRTemp t2
= IRTemp_INVALID
;
19945 IRTemp t3
= IRTemp_INVALID
;
19946 IRTemp t4
= IRTemp_INVALID
;
19947 IRTemp t5
= IRTemp_INVALID
;
19948 IRType ty
= Ity_INVALID
;
19955 Long delta
= deltaIN
;
19956 UChar opc
= getUChar(delta
); delta
++;
19958 /* delta now points at the modrm byte. In most of the cases that
19959 follow, neither the F2 nor F3 prefixes are allowed. However,
19960 for some basic arithmetic operations we have to allow F2/XACQ or
19961 F3/XREL in the case where the destination is memory and the LOCK
19962 prefix is also present. Do this check by looking at the modrm
19963 byte but not advancing delta over it. */
19964 /* By default, F2 and F3 are not allowed, so let's start off with
19966 Bool validF2orF3
= haveF2orF3(pfx
) ? False
: True
;
19967 { UChar tmp_modrm
= getUChar(delta
);
19969 case 0x00: /* ADD Gb,Eb */ case 0x01: /* ADD Gv,Ev */
19970 case 0x08: /* OR Gb,Eb */ case 0x09: /* OR Gv,Ev */
19971 case 0x10: /* ADC Gb,Eb */ case 0x11: /* ADC Gv,Ev */
19972 case 0x18: /* SBB Gb,Eb */ case 0x19: /* SBB Gv,Ev */
19973 case 0x20: /* AND Gb,Eb */ case 0x21: /* AND Gv,Ev */
19974 case 0x28: /* SUB Gb,Eb */ case 0x29: /* SUB Gv,Ev */
19975 case 0x30: /* XOR Gb,Eb */ case 0x31: /* XOR Gv,Ev */
19976 if (!epartIsReg(tmp_modrm
)
19977 && haveF2orF3(pfx
) && !haveF2andF3(pfx
) && haveLOCK(pfx
)) {
19978 /* dst is mem, and we have F2 or F3 but not both */
19979 validF2orF3
= True
;
19987 /* Now, in the switch below, for the opc values examined by the
19988 switch above, use validF2orF3 rather than looking at pfx
19992 case 0x00: /* ADD Gb,Eb */
19993 if (!validF2orF3
) goto decode_failure
;
19994 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Add8
, WithFlagNone
, True
, 1, delta
, "add" );
19996 case 0x01: /* ADD Gv,Ev */
19997 if (!validF2orF3
) goto decode_failure
;
19998 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Add8
, WithFlagNone
, True
, sz
, delta
, "add" );
20001 case 0x02: /* ADD Eb,Gb */
20002 if (haveF2orF3(pfx
)) goto decode_failure
;
20003 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Add8
, WithFlagNone
, True
, 1, delta
, "add" );
20005 case 0x03: /* ADD Ev,Gv */
20006 if (haveF2orF3(pfx
)) goto decode_failure
;
20007 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Add8
, WithFlagNone
, True
, sz
, delta
, "add" );
20010 case 0x04: /* ADD Ib, AL */
20011 if (haveF2orF3(pfx
)) goto decode_failure
;
20012 delta
= dis_op_imm_A( 1, False
, Iop_Add8
, True
, delta
, "add" );
20014 case 0x05: /* ADD Iv, eAX */
20015 if (haveF2orF3(pfx
)) goto decode_failure
;
20016 delta
= dis_op_imm_A(sz
, False
, Iop_Add8
, True
, delta
, "add" );
20019 case 0x08: /* OR Gb,Eb */
20020 if (!validF2orF3
) goto decode_failure
;
20021 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Or8
, WithFlagNone
, True
, 1, delta
, "or" );
20023 case 0x09: /* OR Gv,Ev */
20024 if (!validF2orF3
) goto decode_failure
;
20025 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Or8
, WithFlagNone
, True
, sz
, delta
, "or" );
20028 case 0x0A: /* OR Eb,Gb */
20029 if (haveF2orF3(pfx
)) goto decode_failure
;
20030 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Or8
, WithFlagNone
, True
, 1, delta
, "or" );
20032 case 0x0B: /* OR Ev,Gv */
20033 if (haveF2orF3(pfx
)) goto decode_failure
;
20034 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Or8
, WithFlagNone
, True
, sz
, delta
, "or" );
20037 case 0x0C: /* OR Ib, AL */
20038 if (haveF2orF3(pfx
)) goto decode_failure
;
20039 delta
= dis_op_imm_A( 1, False
, Iop_Or8
, True
, delta
, "or" );
20041 case 0x0D: /* OR Iv, eAX */
20042 if (haveF2orF3(pfx
)) goto decode_failure
;
20043 delta
= dis_op_imm_A( sz
, False
, Iop_Or8
, True
, delta
, "or" );
20046 case 0x10: /* ADC Gb,Eb */
20047 if (!validF2orF3
) goto decode_failure
;
20048 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Add8
, WithFlagCarry
, True
, 1, delta
, "adc" );
20050 case 0x11: /* ADC Gv,Ev */
20051 if (!validF2orF3
) goto decode_failure
;
20052 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Add8
, WithFlagCarry
, True
, sz
, delta
, "adc" );
20055 case 0x12: /* ADC Eb,Gb */
20056 if (haveF2orF3(pfx
)) goto decode_failure
;
20057 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Add8
, WithFlagCarry
, True
, 1, delta
, "adc" );
20059 case 0x13: /* ADC Ev,Gv */
20060 if (haveF2orF3(pfx
)) goto decode_failure
;
20061 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Add8
, WithFlagCarry
, True
, sz
, delta
, "adc" );
20064 case 0x14: /* ADC Ib, AL */
20065 if (haveF2orF3(pfx
)) goto decode_failure
;
20066 delta
= dis_op_imm_A( 1, True
, Iop_Add8
, True
, delta
, "adc" );
20068 case 0x15: /* ADC Iv, eAX */
20069 if (haveF2orF3(pfx
)) goto decode_failure
;
20070 delta
= dis_op_imm_A( sz
, True
, Iop_Add8
, True
, delta
, "adc" );
20073 case 0x18: /* SBB Gb,Eb */
20074 if (!validF2orF3
) goto decode_failure
;
20075 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Sub8
, WithFlagCarry
, True
, 1, delta
, "sbb" );
20077 case 0x19: /* SBB Gv,Ev */
20078 if (!validF2orF3
) goto decode_failure
;
20079 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Sub8
, WithFlagCarry
, True
, sz
, delta
, "sbb" );
20082 case 0x1A: /* SBB Eb,Gb */
20083 if (haveF2orF3(pfx
)) goto decode_failure
;
20084 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Sub8
, WithFlagCarry
, True
, 1, delta
, "sbb" );
20086 case 0x1B: /* SBB Ev,Gv */
20087 if (haveF2orF3(pfx
)) goto decode_failure
;
20088 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Sub8
, WithFlagCarry
, True
, sz
, delta
, "sbb" );
20091 case 0x1C: /* SBB Ib, AL */
20092 if (haveF2orF3(pfx
)) goto decode_failure
;
20093 delta
= dis_op_imm_A( 1, True
, Iop_Sub8
, True
, delta
, "sbb" );
20095 case 0x1D: /* SBB Iv, eAX */
20096 if (haveF2orF3(pfx
)) goto decode_failure
;
20097 delta
= dis_op_imm_A( sz
, True
, Iop_Sub8
, True
, delta
, "sbb" );
20100 case 0x20: /* AND Gb,Eb */
20101 if (!validF2orF3
) goto decode_failure
;
20102 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_And8
, WithFlagNone
, True
, 1, delta
, "and" );
20104 case 0x21: /* AND Gv,Ev */
20105 if (!validF2orF3
) goto decode_failure
;
20106 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_And8
, WithFlagNone
, True
, sz
, delta
, "and" );
20109 case 0x22: /* AND Eb,Gb */
20110 if (haveF2orF3(pfx
)) goto decode_failure
;
20111 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_And8
, WithFlagNone
, True
, 1, delta
, "and" );
20113 case 0x23: /* AND Ev,Gv */
20114 if (haveF2orF3(pfx
)) goto decode_failure
;
20115 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_And8
, WithFlagNone
, True
, sz
, delta
, "and" );
20118 case 0x24: /* AND Ib, AL */
20119 if (haveF2orF3(pfx
)) goto decode_failure
;
20120 delta
= dis_op_imm_A( 1, False
, Iop_And8
, True
, delta
, "and" );
20122 case 0x25: /* AND Iv, eAX */
20123 if (haveF2orF3(pfx
)) goto decode_failure
;
20124 delta
= dis_op_imm_A( sz
, False
, Iop_And8
, True
, delta
, "and" );
20127 case 0x28: /* SUB Gb,Eb */
20128 if (!validF2orF3
) goto decode_failure
;
20129 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Sub8
, WithFlagNone
, True
, 1, delta
, "sub" );
20131 case 0x29: /* SUB Gv,Ev */
20132 if (!validF2orF3
) goto decode_failure
;
20133 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Sub8
, WithFlagNone
, True
, sz
, delta
, "sub" );
20136 case 0x2A: /* SUB Eb,Gb */
20137 if (haveF2orF3(pfx
)) goto decode_failure
;
20138 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Sub8
, WithFlagNone
, True
, 1, delta
, "sub" );
20140 case 0x2B: /* SUB Ev,Gv */
20141 if (haveF2orF3(pfx
)) goto decode_failure
;
20142 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Sub8
, WithFlagNone
, True
, sz
, delta
, "sub" );
20145 case 0x2C: /* SUB Ib, AL */
20146 if (haveF2orF3(pfx
)) goto decode_failure
;
20147 delta
= dis_op_imm_A(1, False
, Iop_Sub8
, True
, delta
, "sub" );
20149 case 0x2D: /* SUB Iv, eAX */
20150 if (haveF2orF3(pfx
)) goto decode_failure
;
20151 delta
= dis_op_imm_A( sz
, False
, Iop_Sub8
, True
, delta
, "sub" );
20154 case 0x30: /* XOR Gb,Eb */
20155 if (!validF2orF3
) goto decode_failure
;
20156 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Xor8
, WithFlagNone
, True
, 1, delta
, "xor" );
20158 case 0x31: /* XOR Gv,Ev */
20159 if (!validF2orF3
) goto decode_failure
;
20160 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Xor8
, WithFlagNone
, True
, sz
, delta
, "xor" );
20163 case 0x32: /* XOR Eb,Gb */
20164 if (haveF2orF3(pfx
)) goto decode_failure
;
20165 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Xor8
, WithFlagNone
, True
, 1, delta
, "xor" );
20167 case 0x33: /* XOR Ev,Gv */
20168 if (haveF2orF3(pfx
)) goto decode_failure
;
20169 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Xor8
, WithFlagNone
, True
, sz
, delta
, "xor" );
20172 case 0x34: /* XOR Ib, AL */
20173 if (haveF2orF3(pfx
)) goto decode_failure
;
20174 delta
= dis_op_imm_A( 1, False
, Iop_Xor8
, True
, delta
, "xor" );
20176 case 0x35: /* XOR Iv, eAX */
20177 if (haveF2orF3(pfx
)) goto decode_failure
;
20178 delta
= dis_op_imm_A( sz
, False
, Iop_Xor8
, True
, delta
, "xor" );
20181 case 0x38: /* CMP Gb,Eb */
20182 if (haveF2orF3(pfx
)) goto decode_failure
;
20183 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Sub8
, WithFlagNone
, False
, 1, delta
, "cmp" );
20185 case 0x39: /* CMP Gv,Ev */
20186 if (haveF2orF3(pfx
)) goto decode_failure
;
20187 delta
= dis_op2_G_E ( vbi
, pfx
, Iop_Sub8
, WithFlagNone
, False
, sz
, delta
, "cmp" );
20190 case 0x3A: /* CMP Eb,Gb */
20191 if (haveF2orF3(pfx
)) goto decode_failure
;
20192 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Sub8
, WithFlagNone
, False
, 1, delta
, "cmp" );
20194 case 0x3B: /* CMP Ev,Gv */
20195 if (haveF2orF3(pfx
)) goto decode_failure
;
20196 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Sub8
, WithFlagNone
, False
, sz
, delta
, "cmp" );
20199 case 0x3C: /* CMP Ib, AL */
20200 if (haveF2orF3(pfx
)) goto decode_failure
;
20201 delta
= dis_op_imm_A( 1, False
, Iop_Sub8
, False
, delta
, "cmp" );
20203 case 0x3D: /* CMP Iv, eAX */
20204 if (haveF2orF3(pfx
)) goto decode_failure
;
20205 delta
= dis_op_imm_A( sz
, False
, Iop_Sub8
, False
, delta
, "cmp" );
20208 case 0x50: /* PUSH eAX */
20209 case 0x51: /* PUSH eCX */
20210 case 0x52: /* PUSH eDX */
20211 case 0x53: /* PUSH eBX */
20212 case 0x55: /* PUSH eBP */
20213 case 0x56: /* PUSH eSI */
20214 case 0x57: /* PUSH eDI */
20215 case 0x54: /* PUSH eSP */
20216 /* This is the Right Way, in that the value to be pushed is
20217 established before %rsp is changed, so that pushq %rsp
20218 correctly pushes the old value. */
20219 if (haveF2orF3(pfx
)) goto decode_failure
;
20220 vassert(sz
== 2 || sz
== 4 || sz
== 8);
20222 sz
= 8; /* there is no encoding for 32-bit push in 64-bit mode */
20223 ty
= sz
==2 ? Ity_I16
: Ity_I64
;
20225 t2
= newTemp(Ity_I64
);
20226 assign(t1
, getIRegRexB(sz
, pfx
, opc
-0x50));
20227 assign(t2
, binop(Iop_Sub64
, getIReg64(R_RSP
), mkU64(sz
)));
20228 putIReg64(R_RSP
, mkexpr(t2
) );
20229 storeLE(mkexpr(t2
),mkexpr(t1
));
20230 DIP("push%c %s\n", nameISize(sz
), nameIRegRexB(sz
,pfx
,opc
-0x50));
20233 case 0x58: /* POP eAX */
20234 case 0x59: /* POP eCX */
20235 case 0x5A: /* POP eDX */
20236 case 0x5B: /* POP eBX */
20237 case 0x5D: /* POP eBP */
20238 case 0x5E: /* POP eSI */
20239 case 0x5F: /* POP eDI */
20240 case 0x5C: /* POP eSP */
20241 if (haveF2orF3(pfx
)) goto decode_failure
;
20242 vassert(sz
== 2 || sz
== 4 || sz
== 8);
20244 sz
= 8; /* there is no encoding for 32-bit pop in 64-bit mode */
20245 t1
= newTemp(szToITy(sz
));
20246 t2
= newTemp(Ity_I64
);
20247 assign(t2
, getIReg64(R_RSP
));
20248 assign(t1
, loadLE(szToITy(sz
),mkexpr(t2
)));
20249 putIReg64(R_RSP
, binop(Iop_Add64
, mkexpr(t2
), mkU64(sz
)));
20250 putIRegRexB(sz
, pfx
, opc
-0x58, mkexpr(t1
));
20251 DIP("pop%c %s\n", nameISize(sz
), nameIRegRexB(sz
,pfx
,opc
-0x58));
20254 case 0x63: /* MOVSX */
20255 if (haveF2orF3(pfx
)) goto decode_failure
;
20256 if (haveREX(pfx
) && 1==getRexW(pfx
)) {
20258 /* movsx r/m32 to r64 */
20259 modrm
= getUChar(delta
);
20260 if (epartIsReg(modrm
)) {
20262 putIRegG(8, pfx
, modrm
,
20264 getIRegE(4, pfx
, modrm
)));
20265 DIP("movslq %s,%s\n",
20266 nameIRegE(4, pfx
, modrm
),
20267 nameIRegG(8, pfx
, modrm
));
20270 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
20272 putIRegG(8, pfx
, modrm
,
20274 loadLE(Ity_I32
, mkexpr(addr
))));
20275 DIP("movslq %s,%s\n", dis_buf
,
20276 nameIRegG(8, pfx
, modrm
));
20280 goto decode_failure
;
20283 case 0x68: /* PUSH Iv */
20284 if (haveF2orF3(pfx
)) goto decode_failure
;
20285 /* Note, sz==4 is not possible in 64-bit mode. Hence ... */
20286 if (sz
== 4) sz
= 8;
20287 d64
= getSDisp(imin(4,sz
),delta
);
20288 delta
+= imin(4,sz
);
20291 case 0x69: /* IMUL Iv, Ev, Gv */
20292 if (haveF2orF3(pfx
)) goto decode_failure
;
20293 delta
= dis_imul_I_E_G ( vbi
, pfx
, sz
, delta
, sz
);
20296 case 0x6A: /* PUSH Ib, sign-extended to sz */
20297 if (haveF2orF3(pfx
)) goto decode_failure
;
20298 /* Note, sz==4 is not possible in 64-bit mode. Hence ... */
20299 if (sz
== 4) sz
= 8;
20300 d64
= getSDisp8(delta
); delta
+= 1;
20304 t1
= newTemp(Ity_I64
);
20306 assign( t1
, binop(Iop_Sub64
,getIReg64(R_RSP
),mkU64(sz
)) );
20307 putIReg64(R_RSP
, mkexpr(t1
) );
20308 /* stop mkU16 asserting if d32 is a negative 16-bit number
20312 storeLE( mkexpr(t1
), mkU(ty
,d64
) );
20313 DIP("push%c $%lld\n", nameISize(sz
), (Long
)d64
);
20316 case 0x6B: /* IMUL Ib, Ev, Gv */
20317 delta
= dis_imul_I_E_G ( vbi
, pfx
, sz
, delta
, 1 );
20322 case 0x72: /* JBb/JNAEb (jump below) */
20323 case 0x73: /* JNBb/JAEb (jump not below) */
20324 case 0x74: /* JZb/JEb (jump zero) */
20325 case 0x75: /* JNZb/JNEb (jump not zero) */
20326 case 0x76: /* JBEb/JNAb (jump below or equal) */
20327 case 0x77: /* JNBEb/JAb (jump not below or equal) */
20328 case 0x78: /* JSb (jump negative) */
20329 case 0x79: /* JSb (jump not negative) */
20330 case 0x7A: /* JP (jump parity even) */
20331 case 0x7B: /* JNP/JPO (jump parity odd) */
20332 case 0x7C: /* JLb/JNGEb (jump less) */
20333 case 0x7D: /* JGEb/JNLb (jump greater or equal) */
20334 case 0x7E: /* JLEb/JNGb (jump less or equal) */
20335 case 0x7F: { /* JGb/JNLEb (jump greater) */
20337 const HChar
* comment
= "";
20338 if (haveF3(pfx
)) goto decode_failure
;
20339 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
20340 jmpDelta
= getSDisp8(delta
);
20341 vassert(-128 <= jmpDelta
&& jmpDelta
< 128);
20342 d64
= (guest_RIP_bbstart
+delta
+1) + jmpDelta
;
20344 /* End the block at this point. */
20345 jcc_01( dres
, (AMD64Condcode
)(opc
- 0x70),
20346 guest_RIP_bbstart
+delta
, d64
);
20347 vassert(dres
->whatNext
== Dis_StopHere
);
20348 DIP("j%s-8 0x%llx %s\n", name_AMD64Condcode(opc
- 0x70), (ULong
)d64
,
20353 case 0x80: /* Grp1 Ib,Eb */
20354 modrm
= getUChar(delta
);
20355 /* Disallow F2/XACQ and F3/XREL for the non-mem case. Allow
20356 just one for the mem case and also require LOCK in this case.
20357 Note that this erroneously allows XACQ/XREL on CMP since we
20358 don't check the subopcode here. No big deal. */
20359 if (epartIsReg(modrm
) && haveF2orF3(pfx
))
20360 goto decode_failure
;
20361 if (!epartIsReg(modrm
) && haveF2andF3(pfx
))
20362 goto decode_failure
;
20363 if (!epartIsReg(modrm
) && haveF2orF3(pfx
) && !haveLOCK(pfx
))
20364 goto decode_failure
;
20365 am_sz
= lengthAMode(pfx
,delta
);
20368 d64
= getSDisp8(delta
+ am_sz
);
20369 delta
= dis_Grp1 ( vbi
, pfx
, delta
, modrm
, am_sz
, d_sz
, sz
, d64
);
20372 case 0x81: /* Grp1 Iv,Ev */
20373 modrm
= getUChar(delta
);
20374 /* Same comment as for case 0x80 just above. */
20375 if (epartIsReg(modrm
) && haveF2orF3(pfx
))
20376 goto decode_failure
;
20377 if (!epartIsReg(modrm
) && haveF2andF3(pfx
))
20378 goto decode_failure
;
20379 if (!epartIsReg(modrm
) && haveF2orF3(pfx
) && !haveLOCK(pfx
))
20380 goto decode_failure
;
20381 am_sz
= lengthAMode(pfx
,delta
);
20383 d64
= getSDisp(d_sz
, delta
+ am_sz
);
20384 delta
= dis_Grp1 ( vbi
, pfx
, delta
, modrm
, am_sz
, d_sz
, sz
, d64
);
20387 case 0x83: /* Grp1 Ib,Ev */
20388 if (haveF2orF3(pfx
)) goto decode_failure
;
20389 modrm
= getUChar(delta
);
20390 am_sz
= lengthAMode(pfx
,delta
);
20392 d64
= getSDisp8(delta
+ am_sz
);
20393 delta
= dis_Grp1 ( vbi
, pfx
, delta
, modrm
, am_sz
, d_sz
, sz
, d64
);
20396 case 0x84: /* TEST Eb,Gb */
20397 if (haveF2orF3(pfx
)) goto decode_failure
;
20398 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_And8
, WithFlagNone
, False
,
20399 1, delta
, "test" );
20402 case 0x85: /* TEST Ev,Gv */
20403 if (haveF2orF3(pfx
)) goto decode_failure
;
20404 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_And8
, WithFlagNone
, False
,
20405 sz
, delta
, "test" );
20408 /* XCHG reg,mem automatically asserts LOCK# even without a LOCK
20409 prefix. Therefore, generate CAS regardless of the presence or
20410 otherwise of a LOCK prefix. */
20411 case 0x86: /* XCHG Gb,Eb */
20413 /* Fall through ... */
20414 case 0x87: /* XCHG Gv,Ev */
20415 modrm
= getUChar(delta
);
20416 /* Check whether F2 or F3 are allowable. For the mem case, one
20417 or the othter but not both are. We don't care about the
20418 presence of LOCK in this case -- XCHG is unusual in this
20420 if (haveF2orF3(pfx
)) {
20421 if (epartIsReg(modrm
)) {
20422 goto decode_failure
;
20424 if (haveF2andF3(pfx
))
20425 goto decode_failure
;
20429 t1
= newTemp(ty
); t2
= newTemp(ty
);
20430 if (epartIsReg(modrm
)) {
20431 assign(t1
, getIRegE(sz
, pfx
, modrm
));
20432 assign(t2
, getIRegG(sz
, pfx
, modrm
));
20433 putIRegG(sz
, pfx
, modrm
, mkexpr(t1
));
20434 putIRegE(sz
, pfx
, modrm
, mkexpr(t2
));
20436 DIP("xchg%c %s, %s\n",
20437 nameISize(sz
), nameIRegG(sz
, pfx
, modrm
),
20438 nameIRegE(sz
, pfx
, modrm
));
20440 *expect_CAS
= True
;
20441 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
20442 assign( t1
, loadLE(ty
, mkexpr(addr
)) );
20443 assign( t2
, getIRegG(sz
, pfx
, modrm
) );
20444 casLE( mkexpr(addr
),
20445 mkexpr(t1
), mkexpr(t2
), guest_RIP_curr_instr
);
20446 putIRegG( sz
, pfx
, modrm
, mkexpr(t1
) );
20448 DIP("xchg%c %s, %s\n", nameISize(sz
),
20449 nameIRegG(sz
, pfx
, modrm
), dis_buf
);
20453 case 0x88: { /* MOV Gb,Eb */
20454 /* We let dis_mov_G_E decide whether F3(XRELEASE) is allowable. */
20456 delta
= dis_mov_G_E(vbi
, pfx
, 1, delta
, &ok
);
20457 if (!ok
) goto decode_failure
;
20461 case 0x89: { /* MOV Gv,Ev */
20462 /* We let dis_mov_G_E decide whether F3(XRELEASE) is allowable. */
20464 delta
= dis_mov_G_E(vbi
, pfx
, sz
, delta
, &ok
);
20465 if (!ok
) goto decode_failure
;
20469 case 0x8A: /* MOV Eb,Gb */
20470 if (haveF2orF3(pfx
)) goto decode_failure
;
20471 delta
= dis_mov_E_G(vbi
, pfx
, 1, delta
);
20474 case 0x8B: /* MOV Ev,Gv */
20475 if (haveF2orF3(pfx
)) goto decode_failure
;
20476 delta
= dis_mov_E_G(vbi
, pfx
, sz
, delta
);
20479 case 0x8C: /* MOV S,E -- MOV from a SEGMENT REGISTER */
20480 if (haveF2orF3(pfx
)) goto decode_failure
;
20481 delta
= dis_mov_S_E(vbi
, pfx
, sz
, delta
);
20484 case 0x8D: /* LEA M,Gv */
20485 if (haveF2orF3(pfx
)) goto decode_failure
;
20486 if (sz
!= 4 && sz
!= 8)
20487 goto decode_failure
;
20488 modrm
= getUChar(delta
);
20489 if (epartIsReg(modrm
))
20490 goto decode_failure
;
20491 /* NOTE! this is the one place where a segment override prefix
20492 has no effect on the address calculation. Therefore we clear
20493 any segment override bits in pfx. */
20494 addr
= disAMode ( &alen
, vbi
, clearSegBits(pfx
), delta
, dis_buf
, 0 );
20496 /* This is a hack. But it isn't clear that really doing the
20497 calculation at 32 bits is really worth it. Hence for leal,
20498 do the full 64-bit calculation and then truncate it. */
20499 putIRegG( sz
, pfx
, modrm
,
20501 ? unop(Iop_64to32
, mkexpr(addr
))
20504 DIP("lea%c %s, %s\n", nameISize(sz
), dis_buf
,
20505 nameIRegG(sz
,pfx
,modrm
));
20508 case 0x8F: { /* POPQ m64 / POPW m16 */
20511 /* There is no encoding for 32-bit pop in 64-bit mode.
20512 So sz==4 actually means sz==8. */
20513 if (haveF2orF3(pfx
)) goto decode_failure
;
20514 vassert(sz
== 2 || sz
== 4
20515 || /* tolerate redundant REX.W, see #210481 */ sz
== 8);
20516 if (sz
== 4) sz
= 8;
20517 if (sz
!= 8) goto decode_failure
; // until we know a sz==2 test case exists
20519 rm
= getUChar(delta
);
20521 /* make sure this instruction is correct POP */
20522 if (epartIsReg(rm
) || gregLO3ofRM(rm
) != 0)
20523 goto decode_failure
;
20524 /* and has correct size */
20527 t1
= newTemp(Ity_I64
);
20528 t3
= newTemp(Ity_I64
);
20529 assign( t1
, getIReg64(R_RSP
) );
20530 assign( t3
, loadLE(Ity_I64
, mkexpr(t1
)) );
20532 /* Increase RSP; must be done before the STORE. Intel manual
20533 says: If the RSP register is used as a base register for
20534 addressing a destination operand in memory, the POP
20535 instruction computes the effective address of the operand
20536 after it increments the RSP register. */
20537 putIReg64(R_RSP
, binop(Iop_Add64
, mkexpr(t1
), mkU64(sz
)) );
20539 addr
= disAMode ( &len
, vbi
, pfx
, delta
, dis_buf
, 0 );
20540 storeLE( mkexpr(addr
), mkexpr(t3
) );
20542 DIP("popl %s\n", dis_buf
);
20548 case 0x90: /* XCHG eAX,eAX */
20549 /* detect and handle F3 90 (rep nop) specially */
20550 if (!have66(pfx
) && !haveF2(pfx
) && haveF3(pfx
)) {
20551 DIP("rep nop (P4 pause)\n");
20552 /* "observe" the hint. The Vex client needs to be careful not
20553 to cause very long delays as a result, though. */
20554 jmp_lit(dres
, Ijk_Yield
, guest_RIP_bbstart
+delta
);
20555 vassert(dres
->whatNext
== Dis_StopHere
);
20558 /* detect and handle NOPs specially */
20559 if (/* F2/F3 probably change meaning completely */
20561 /* If REX.B is 1, we're not exchanging rAX with itself */
20562 && getRexB(pfx
)==0 ) {
20566 /* else fall through to normal case. */
20567 case 0x91: /* XCHG rAX,rCX */
20568 case 0x92: /* XCHG rAX,rDX */
20569 case 0x93: /* XCHG rAX,rBX */
20570 case 0x94: /* XCHG rAX,rSP */
20571 case 0x95: /* XCHG rAX,rBP */
20572 case 0x96: /* XCHG rAX,rSI */
20573 case 0x97: /* XCHG rAX,rDI */
20574 /* guard against mutancy */
20575 if (haveF2orF3(pfx
)) goto decode_failure
;
20576 codegen_xchg_rAX_Reg ( pfx
, sz
, opc
- 0x90 );
20579 case 0x98: /* CBW */
20580 if (haveF2orF3(pfx
)) goto decode_failure
;
20582 putIRegRAX( 8, unop(Iop_32Sto64
, getIRegRAX(4)) );
20583 DIP(/*"cdqe\n"*/"cltq\n");
20587 putIRegRAX( 4, unop(Iop_16Sto32
, getIRegRAX(2)) );
20592 putIRegRAX( 2, unop(Iop_8Sto16
, getIRegRAX(1)) );
20596 goto decode_failure
;
20598 case 0x99: /* CWD/CDQ/CQO */
20599 if (haveF2orF3(pfx
)) goto decode_failure
;
20600 vassert(sz
== 2 || sz
== 4 || sz
== 8);
20603 binop(mkSizedOp(ty
,Iop_Sar8
),
20605 mkU8(sz
== 2 ? 15 : (sz
== 4 ? 31 : 63))) );
20606 DIP(sz
== 2 ? "cwd\n"
20607 : (sz
== 4 ? /*"cdq\n"*/ "cltd\n"
20611 case 0x9B: /* FWAIT (X87 insn) */
20616 case 0x9C: /* PUSHF */ {
20617 /* Note. There is no encoding for a 32-bit pushf in 64-bit
20618 mode. So sz==4 actually means sz==8. */
20619 /* 24 July 06: has also been seen with a redundant REX prefix,
20620 so must also allow sz==8. */
20621 if (haveF2orF3(pfx
)) goto decode_failure
;
20622 vassert(sz
== 2 || sz
== 4 || sz
== 8);
20623 if (sz
== 4) sz
= 8;
20624 if (sz
!= 8) goto decode_failure
; // until we know a sz==2 test case exists
20626 t1
= newTemp(Ity_I64
);
20627 assign( t1
, binop(Iop_Sub64
,getIReg64(R_RSP
),mkU64(sz
)) );
20628 putIReg64(R_RSP
, mkexpr(t1
) );
20630 t2
= newTemp(Ity_I64
);
20631 assign( t2
, mk_amd64g_calculate_rflags_all() );
20633 /* Patch in the D flag. This can simply be a copy of bit 10 of
20634 baseBlock[OFFB_DFLAG]. */
20635 t3
= newTemp(Ity_I64
);
20636 assign( t3
, binop(Iop_Or64
,
20639 IRExpr_Get(OFFB_DFLAG
,Ity_I64
),
20643 /* And patch in the ID flag. */
20644 t4
= newTemp(Ity_I64
);
20645 assign( t4
, binop(Iop_Or64
,
20648 binop(Iop_Shl64
, IRExpr_Get(OFFB_IDFLAG
,Ity_I64
),
20653 /* And patch in the AC flag too. */
20654 t5
= newTemp(Ity_I64
);
20655 assign( t5
, binop(Iop_Or64
,
20658 binop(Iop_Shl64
, IRExpr_Get(OFFB_ACFLAG
,Ity_I64
),
20663 /* if sz==2, the stored value needs to be narrowed. */
20665 storeLE( mkexpr(t1
), unop(Iop_32to16
,
20666 unop(Iop_64to32
,mkexpr(t5
))) );
20668 storeLE( mkexpr(t1
), mkexpr(t5
) );
20670 DIP("pushf%c\n", nameISize(sz
));
20674 case 0x9D: /* POPF */
20675 /* Note. There is no encoding for a 32-bit popf in 64-bit mode.
20676 So sz==4 actually means sz==8. */
20677 if (haveF2orF3(pfx
)) goto decode_failure
;
20678 vassert(sz
== 2 || sz
== 4 || sz
== 8);
20679 if (sz
== 4) sz
= 8;
20680 if (sz
!= 8) goto decode_failure
; // until we know a sz==2 test case exists
20681 t1
= newTemp(Ity_I64
); t2
= newTemp(Ity_I64
);
20682 assign(t2
, getIReg64(R_RSP
));
20683 assign(t1
, widenUto64(loadLE(szToITy(sz
),mkexpr(t2
))));
20684 putIReg64(R_RSP
, binop(Iop_Add64
, mkexpr(t2
), mkU64(sz
)));
20685 /* t1 is the flag word. Mask out everything except OSZACP and
20686 set the flags thunk to AMD64G_CC_OP_COPY. */
20687 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
20688 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
20689 stmt( IRStmt_Put( OFFB_CC_DEP1
,
20692 mkU64( AMD64G_CC_MASK_C
| AMD64G_CC_MASK_P
20693 | AMD64G_CC_MASK_A
| AMD64G_CC_MASK_Z
20694 | AMD64G_CC_MASK_S
| AMD64G_CC_MASK_O
)
20698 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
20700 /* Also need to set the D flag, which is held in bit 10 of t1.
20701 If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */
20707 binop(Iop_Shr64
, mkexpr(t1
), mkU8(10)),
20709 mkU64(0xFFFFFFFFFFFFFFFFULL
),
20713 /* And set the ID flag */
20719 binop(Iop_Shr64
, mkexpr(t1
), mkU8(21)),
20725 /* And set the AC flag too */
20731 binop(Iop_Shr64
, mkexpr(t1
), mkU8(18)),
20737 DIP("popf%c\n", nameISize(sz
));
20740 case 0x9E: /* SAHF */
20745 case 0x9F: /* LAHF */
20750 case 0xA0: /* MOV Ob,AL */
20751 if (have66orF2orF3(pfx
)) goto decode_failure
;
20753 /* Fall through ... */
20754 case 0xA1: /* MOV Ov,eAX */
20755 if (sz
!= 8 && sz
!= 4 && sz
!= 2 && sz
!= 1)
20756 goto decode_failure
;
20757 d64
= getDisp64(delta
);
20760 addr
= newTemp(Ity_I64
);
20761 assign( addr
, handleAddrOverrides(vbi
, pfx
, mkU64(d64
)) );
20762 putIRegRAX(sz
, loadLE( ty
, mkexpr(addr
) ));
20763 DIP("mov%c %s0x%llx, %s\n", nameISize(sz
),
20764 segRegTxt(pfx
), (ULong
)d64
,
20768 case 0xA2: /* MOV AL,Ob */
20769 if (have66orF2orF3(pfx
)) goto decode_failure
;
20771 /* Fall through ... */
20772 case 0xA3: /* MOV eAX,Ov */
20773 if (sz
!= 8 && sz
!= 4 && sz
!= 2 && sz
!= 1)
20774 goto decode_failure
;
20775 d64
= getDisp64(delta
);
20778 addr
= newTemp(Ity_I64
);
20779 assign( addr
, handleAddrOverrides(vbi
, pfx
, mkU64(d64
)) );
20780 storeLE( mkexpr(addr
), getIRegRAX(sz
) );
20781 DIP("mov%c %s, %s0x%llx\n", nameISize(sz
), nameIRegRAX(sz
),
20782 segRegTxt(pfx
), (ULong
)d64
);
20787 /* F3 A4: rep movsb */
20788 if (haveF3(pfx
) && !haveF2(pfx
)) {
20791 dis_REP_op ( dres
, AMD64CondAlways
, dis_MOVS
, sz
,
20792 guest_RIP_curr_instr
,
20793 guest_RIP_bbstart
+delta
, "rep movs", pfx
);
20794 dres
->whatNext
= Dis_StopHere
;
20798 if (!haveF3(pfx
) && !haveF2(pfx
)) {
20801 dis_string_op( dis_MOVS
, sz
, "movs", pfx
);
20804 goto decode_failure
;
20808 /* F3 A6/A7: repe cmps/rep cmps{w,l,q} */
20809 if (haveF3(pfx
) && !haveF2(pfx
)) {
20812 dis_REP_op ( dres
, AMD64CondZ
, dis_CMPS
, sz
,
20813 guest_RIP_curr_instr
,
20814 guest_RIP_bbstart
+delta
, "repe cmps", pfx
);
20815 dres
->whatNext
= Dis_StopHere
;
20818 goto decode_failure
;
20822 /* F3 AA/AB: rep stosb/rep stos{w,l,q} */
20823 if (haveF3(pfx
) && !haveF2(pfx
)) {
20826 dis_REP_op ( dres
, AMD64CondAlways
, dis_STOS
, sz
,
20827 guest_RIP_curr_instr
,
20828 guest_RIP_bbstart
+delta
, "rep stos", pfx
);
20829 vassert(dres
->whatNext
== Dis_StopHere
);
20832 /* AA/AB: stosb/stos{w,l,q} */
20833 if (!haveF3(pfx
) && !haveF2(pfx
)) {
20836 dis_string_op( dis_STOS
, sz
, "stos", pfx
);
20839 goto decode_failure
;
20841 case 0xA8: /* TEST Ib, AL */
20842 if (haveF2orF3(pfx
)) goto decode_failure
;
20843 delta
= dis_op_imm_A( 1, False
, Iop_And8
, False
, delta
, "test" );
20845 case 0xA9: /* TEST Iv, eAX */
20846 if (haveF2orF3(pfx
)) goto decode_failure
;
20847 delta
= dis_op_imm_A( sz
, False
, Iop_And8
, False
, delta
, "test" );
20850 case 0xAC: /* LODS, no REP prefix */
20852 dis_string_op( dis_LODS
, ( opc
== 0xAC ? 1 : sz
), "lods", pfx
);
20857 /* F2 AE/AF: repne scasb/repne scas{w,l,q} */
20858 if (haveF2(pfx
) && !haveF3(pfx
)) {
20861 dis_REP_op ( dres
, AMD64CondNZ
, dis_SCAS
, sz
,
20862 guest_RIP_curr_instr
,
20863 guest_RIP_bbstart
+delta
, "repne scas", pfx
);
20864 vassert(dres
->whatNext
== Dis_StopHere
);
20867 /* F3 AE/AF: repe scasb/repe scas{w,l,q} */
20868 if (!haveF2(pfx
) && haveF3(pfx
)) {
20871 dis_REP_op ( dres
, AMD64CondZ
, dis_SCAS
, sz
,
20872 guest_RIP_curr_instr
,
20873 guest_RIP_bbstart
+delta
, "repe scas", pfx
);
20874 vassert(dres
->whatNext
== Dis_StopHere
);
20877 /* AE/AF: scasb/scas{w,l,q} */
20878 if (!haveF2(pfx
) && !haveF3(pfx
)) {
20881 dis_string_op( dis_SCAS
, sz
, "scas", pfx
);
20884 goto decode_failure
;
20886 /* XXXX be careful here with moves to AH/BH/CH/DH */
20887 case 0xB0: /* MOV imm,AL */
20888 case 0xB1: /* MOV imm,CL */
20889 case 0xB2: /* MOV imm,DL */
20890 case 0xB3: /* MOV imm,BL */
20891 case 0xB4: /* MOV imm,AH */
20892 case 0xB5: /* MOV imm,CH */
20893 case 0xB6: /* MOV imm,DH */
20894 case 0xB7: /* MOV imm,BH */
20895 if (haveF2orF3(pfx
)) goto decode_failure
;
20896 d64
= getUChar(delta
);
20898 putIRegRexB(1, pfx
, opc
-0xB0, mkU8(d64
));
20899 DIP("movb $%lld,%s\n", d64
, nameIRegRexB(1,pfx
,opc
-0xB0));
20902 case 0xB8: /* MOV imm,eAX */
20903 case 0xB9: /* MOV imm,eCX */
20904 case 0xBA: /* MOV imm,eDX */
20905 case 0xBB: /* MOV imm,eBX */
20906 case 0xBC: /* MOV imm,eSP */
20907 case 0xBD: /* MOV imm,eBP */
20908 case 0xBE: /* MOV imm,eSI */
20909 case 0xBF: /* MOV imm,eDI */
20910 /* This is the one-and-only place where 64-bit literals are
20911 allowed in the instruction stream. */
20912 if (haveF2orF3(pfx
)) goto decode_failure
;
20914 d64
= getDisp64(delta
);
20916 putIRegRexB(8, pfx
, opc
-0xB8, mkU64(d64
));
20917 DIP("movabsq $%lld,%s\n", (Long
)d64
,
20918 nameIRegRexB(8,pfx
,opc
-0xB8));
20920 d64
= getSDisp(imin(4,sz
),delta
);
20921 delta
+= imin(4,sz
);
20922 putIRegRexB(sz
, pfx
, opc
-0xB8,
20923 mkU(szToITy(sz
), d64
& mkSizeMask(sz
)));
20924 DIP("mov%c $%lld,%s\n", nameISize(sz
),
20926 nameIRegRexB(sz
,pfx
,opc
-0xB8));
20930 case 0xC0: { /* Grp2 Ib,Eb */
20931 Bool decode_OK
= True
;
20932 if (haveF2orF3(pfx
)) goto decode_failure
;
20933 modrm
= getUChar(delta
);
20934 am_sz
= lengthAMode(pfx
,delta
);
20936 d64
= getUChar(delta
+ am_sz
);
20938 delta
= dis_Grp2 ( vbi
, pfx
, delta
, modrm
, am_sz
, d_sz
, sz
,
20939 mkU8(d64
& 0xFF), NULL
, &decode_OK
);
20940 if (!decode_OK
) goto decode_failure
;
20944 case 0xC1: { /* Grp2 Ib,Ev */
20945 Bool decode_OK
= True
;
20946 if (haveF2orF3(pfx
)) goto decode_failure
;
20947 modrm
= getUChar(delta
);
20948 am_sz
= lengthAMode(pfx
,delta
);
20950 d64
= getUChar(delta
+ am_sz
);
20951 delta
= dis_Grp2 ( vbi
, pfx
, delta
, modrm
, am_sz
, d_sz
, sz
,
20952 mkU8(d64
& 0xFF), NULL
, &decode_OK
);
20953 if (!decode_OK
) goto decode_failure
;
20957 case 0xC2: /* RET imm16 */
20958 if (have66orF3(pfx
)) goto decode_failure
;
20959 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
20960 d64
= getUDisp16(delta
);
20962 dis_ret(dres
, vbi
, d64
);
20963 DIP("ret $%lld\n", d64
);
20966 case 0xC3: /* RET */
20967 if (have66(pfx
)) goto decode_failure
;
20968 /* F3 is acceptable on AMD. */
20969 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
20970 dis_ret(dres
, vbi
, 0);
20971 DIP(haveF3(pfx
) ? "rep ; ret\n" : "ret\n");
20974 case 0xC6: /* C6 /0 = MOV Ib,Eb */
20976 goto maybe_do_Mov_I_E
;
20977 case 0xC7: /* C7 /0 = MOV Iv,Ev */
20978 goto maybe_do_Mov_I_E
;
20980 modrm
= getUChar(delta
);
20981 if (gregLO3ofRM(modrm
) == 0) {
20982 if (epartIsReg(modrm
)) {
20983 /* Neither F2 nor F3 are allowable. */
20984 if (haveF2orF3(pfx
)) goto decode_failure
;
20985 delta
++; /* mod/rm byte */
20986 d64
= getSDisp(imin(4,sz
),delta
);
20987 delta
+= imin(4,sz
);
20988 putIRegE(sz
, pfx
, modrm
,
20989 mkU(szToITy(sz
), d64
& mkSizeMask(sz
)));
20990 DIP("mov%c $%lld, %s\n", nameISize(sz
),
20992 nameIRegE(sz
,pfx
,modrm
));
20994 if (haveF2(pfx
)) goto decode_failure
;
20995 /* F3(XRELEASE) is allowable here */
20996 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
,
20997 /*xtra*/imin(4,sz
) );
20999 d64
= getSDisp(imin(4,sz
),delta
);
21000 delta
+= imin(4,sz
);
21001 storeLE(mkexpr(addr
),
21002 mkU(szToITy(sz
), d64
& mkSizeMask(sz
)));
21003 DIP("mov%c $%lld, %s\n", nameISize(sz
), (Long
)d64
, dis_buf
);
21007 /* BEGIN HACKY SUPPORT FOR xbegin */
21008 if (opc
== 0xC7 && modrm
== 0xF8 && !have66orF2orF3(pfx
) && sz
== 4
21009 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
)) {
21010 delta
++; /* mod/rm byte */
21011 d64
= getSDisp(4,delta
);
21013 guest_RIP_next_mustcheck
= True
;
21014 guest_RIP_next_assumed
= guest_RIP_bbstart
+ delta
;
21015 Addr64 failAddr
= guest_RIP_bbstart
+ delta
+ d64
;
21016 /* EAX contains the failure status code. Bit 3 is "Set if an
21017 internal buffer overflowed", which seems like the
21018 least-bogus choice we can make here. */
21019 putIRegRAX(4, mkU32(1<<3));
21020 /* And jump to the fail address. */
21021 jmp_lit(dres
, Ijk_Boring
, failAddr
);
21022 vassert(dres
->whatNext
== Dis_StopHere
);
21023 DIP("xbeginq 0x%llx\n", failAddr
);
21026 /* END HACKY SUPPORT FOR xbegin */
21027 /* BEGIN HACKY SUPPORT FOR xabort */
21028 if (opc
== 0xC6 && modrm
== 0xF8 && !have66orF2orF3(pfx
) && sz
== 1
21029 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
)) {
21030 delta
++; /* mod/rm byte */
21031 abyte
= getUChar(delta
); delta
++;
21032 /* There is never a real transaction in progress, so do nothing. */
21033 DIP("xabort $%d", (Int
)abyte
);
21036 /* END HACKY SUPPORT FOR xabort */
21037 goto decode_failure
;
21039 case 0xC8: /* ENTER */
21040 /* Same comments re operand size as for LEAVE below apply.
21041 Also, only handles the case "enter $imm16, $0"; other cases
21042 for the second operand (nesting depth) are not handled. */
21044 goto decode_failure
;
21045 d64
= getUDisp16(delta
);
21047 vassert(d64
>= 0 && d64
<= 0xFFFF);
21048 if (getUChar(delta
) != 0)
21049 goto decode_failure
;
21051 /* Intel docs seem to suggest:
21057 t1
= newTemp(Ity_I64
);
21058 assign(t1
, getIReg64(R_RBP
));
21059 t2
= newTemp(Ity_I64
);
21060 assign(t2
, binop(Iop_Sub64
, getIReg64(R_RSP
), mkU64(8)));
21061 putIReg64(R_RSP
, mkexpr(t2
));
21062 storeLE(mkexpr(t2
), mkexpr(t1
));
21063 putIReg64(R_RBP
, mkexpr(t2
));
21065 putIReg64(R_RSP
, binop(Iop_Sub64
, mkexpr(t2
), mkU64(d64
)));
21067 DIP("enter $%u, $0\n", (UInt
)d64
);
21070 case 0xC9: /* LEAVE */
21071 /* In 64-bit mode this defaults to a 64-bit operand size. There
21072 is no way to encode a 32-bit variant. Hence sz==4 but we do
21075 goto decode_failure
;
21076 t1
= newTemp(Ity_I64
);
21077 t2
= newTemp(Ity_I64
);
21078 assign(t1
, getIReg64(R_RBP
));
21079 /* First PUT RSP looks redundant, but need it because RSP must
21080 always be up-to-date for Memcheck to work... */
21081 putIReg64(R_RSP
, mkexpr(t1
));
21082 assign(t2
, loadLE(Ity_I64
,mkexpr(t1
)));
21083 putIReg64(R_RBP
, mkexpr(t2
));
21084 putIReg64(R_RSP
, binop(Iop_Add64
, mkexpr(t1
), mkU64(8)) );
21088 case 0xCC: /* INT 3 */
21089 jmp_lit(dres
, Ijk_SigTRAP
, guest_RIP_bbstart
+ delta
);
21090 vassert(dres
->whatNext
== Dis_StopHere
);
21094 case 0xCD: /* INT imm8 */
21095 d64
= getUChar(delta
); delta
++;
21097 /* Handle int $0xD2 (Solaris fasttrap syscalls). */
21099 jmp_lit(dres
, Ijk_Sys_int210
, guest_RIP_bbstart
+ delta
);
21100 vassert(dres
->whatNext
== Dis_StopHere
);
21101 DIP("int $0xD2\n");
21104 goto decode_failure
;
21106 case 0xCF: /* IRET */
21107 /* Note, this is an extremely kludgey and limited implementation of iret
21108 based on the extremely kludgey and limited implementation of iret for x86
21109 popq %RIP; popl %CS; popq %RFLAGS; popq %RSP; popl %SS
21110 %CS and %SS are ignored */
21111 if (sz
!= 8 || have66orF2orF3(pfx
)) goto decode_failure
;
21113 t1
= newTemp(Ity_I64
); /* RSP */
21114 t2
= newTemp(Ity_I64
); /* new RIP */
21115 /* t3 = newTemp(Ity_I32); new CS */
21116 t4
= newTemp(Ity_I64
); /* new RFLAGS */
21117 t5
= newTemp(Ity_I64
); /* new RSP */
21118 /* t6 = newTemp(Ity_I32); new SS */
21120 assign(t1
, getIReg64(R_RSP
));
21121 assign(t2
, loadLE(Ity_I64
, binop(Iop_Add64
,mkexpr(t1
),mkU64(0))));
21122 /* assign(t3, loadLE(Ity_I32, binop(Iop_Add64,mkexpr(t1),mkU64(8)))); */
21123 assign(t4
, loadLE(Ity_I64
, binop(Iop_Add64
,mkexpr(t1
),mkU64(16))));
21124 assign(t5
, loadLE(Ity_I64
, binop(Iop_Add64
,mkexpr(t1
),mkU64(24))));
21125 /* assign(t6, loadLE(Ity_I32, binop(Iop_Add64,mkexpr(t1),mkU64(32)))); */
21128 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
21129 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
21130 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
21131 stmt( IRStmt_Put( OFFB_CC_DEP1
,
21134 mkU64( AMD64G_CC_MASK_C
| AMD64G_CC_MASK_P
21135 | AMD64G_CC_MASK_A
| AMD64G_CC_MASK_Z
21136 | AMD64G_CC_MASK_S
| AMD64G_CC_MASK_O
)
21141 /* Also need to set the D flag, which is held in bit 10 of t4.
21142 If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */
21148 binop(Iop_Shr64
, mkexpr(t4
), mkU8(10)),
21150 mkU64(0xFFFFFFFFFFFFFFFFULL
),
21154 /* And set the ID flag */
21160 binop(Iop_Shr64
, mkexpr(t4
), mkU8(21)),
21166 /* And set the AC flag too */
21172 binop(Iop_Shr64
, mkexpr(t4
), mkU8(18)),
21179 /* set new stack */
21180 putIReg64(R_RSP
, mkexpr(t5
));
21182 /* goto new RIP value */
21183 jmp_treg(dres
, Ijk_Ret
, t2
);
21184 DIP("iret (very kludgey)\n");
21187 case 0xD0: { /* Grp2 1,Eb */
21188 Bool decode_OK
= True
;
21189 if (haveF2orF3(pfx
)) goto decode_failure
;
21190 modrm
= getUChar(delta
);
21191 am_sz
= lengthAMode(pfx
,delta
);
21195 delta
= dis_Grp2 ( vbi
, pfx
, delta
, modrm
, am_sz
, d_sz
, sz
,
21196 mkU8(d64
), NULL
, &decode_OK
);
21197 if (!decode_OK
) goto decode_failure
;
21201 case 0xD1: { /* Grp2 1,Ev */
21202 Bool decode_OK
= True
;
21203 if (haveF2orF3(pfx
)) goto decode_failure
;
21204 modrm
= getUChar(delta
);
21205 am_sz
= lengthAMode(pfx
,delta
);
21208 delta
= dis_Grp2 ( vbi
, pfx
, delta
, modrm
, am_sz
, d_sz
, sz
,
21209 mkU8(d64
), NULL
, &decode_OK
);
21210 if (!decode_OK
) goto decode_failure
;
21214 case 0xD2: { /* Grp2 CL,Eb */
21215 Bool decode_OK
= True
;
21216 if (haveF2orF3(pfx
)) goto decode_failure
;
21217 modrm
= getUChar(delta
);
21218 am_sz
= lengthAMode(pfx
,delta
);
21221 delta
= dis_Grp2 ( vbi
, pfx
, delta
, modrm
, am_sz
, d_sz
, sz
,
21222 getIRegCL(), "%cl", &decode_OK
);
21223 if (!decode_OK
) goto decode_failure
;
21227 case 0xD3: { /* Grp2 CL,Ev */
21228 Bool decode_OK
= True
;
21229 if (haveF2orF3(pfx
)) goto decode_failure
;
21230 modrm
= getUChar(delta
);
21231 am_sz
= lengthAMode(pfx
,delta
);
21233 delta
= dis_Grp2 ( vbi
, pfx
, delta
, modrm
, am_sz
, d_sz
, sz
,
21234 getIRegCL(), "%cl", &decode_OK
);
21235 if (!decode_OK
) goto decode_failure
;
21239 case 0xD8: /* X87 instructions */
21247 Bool redundantREXWok
= False
;
21249 if (haveF2orF3(pfx
))
21250 goto decode_failure
;
21252 /* kludge to tolerate redundant rex.w prefixes (should do this
21253 properly one day) */
21254 /* mono 1.1.18.1 produces 48 D9 FA, which is rex.w fsqrt */
21255 if ( (opc
== 0xD9 && getUChar(delta
+0) == 0xFA)/*fsqrt*/ )
21256 redundantREXWok
= True
;
21258 Bool size_OK
= False
;
21261 else if ( sz
== 8 )
21262 size_OK
= redundantREXWok
;
21263 else if ( sz
== 2 ) {
21264 int mod_rm
= getUChar(delta
+0);
21265 int reg
= gregLO3ofRM(mod_rm
);
21266 /* The HotSpot JVM uses these */
21267 if ( (opc
== 0xDD) && (reg
== 0 /* FLDL */ ||
21268 reg
== 4 /* FNSAVE */ ||
21269 reg
== 6 /* FRSTOR */ ) )
21272 /* AMD manual says 0x66 size override is ignored, except where
21273 it is meaningful */
21275 goto decode_failure
;
21277 Bool decode_OK
= False
;
21278 delta
= dis_FPU ( &decode_OK
, vbi
, pfx
, delta
);
21280 goto decode_failure
;
21285 case 0xE0: /* LOOPNE disp8: decrement count, jump if count != 0 && ZF==0 */
21286 case 0xE1: /* LOOPE disp8: decrement count, jump if count != 0 && ZF==1 */
21287 case 0xE2: /* LOOP disp8: decrement count, jump if count != 0 */
21288 { /* The docs say this uses rCX as a count depending on the
21289 address size override, not the operand one. */
21290 IRExpr
* zbit
= NULL
;
21291 IRExpr
* count
= NULL
;
21292 IRExpr
* cond
= NULL
;
21293 const HChar
* xtra
= NULL
;
21295 if (have66orF2orF3(pfx
) || 1==getRexW(pfx
)) goto decode_failure
;
21296 /* So at this point we've rejected any variants which appear to
21297 be governed by the usual operand-size modifiers. Hence only
21298 the address size prefix can have an effect. It changes the
21299 size from 64 (default) to 32. */
21300 d64
= guest_RIP_bbstart
+delta
+1 + getSDisp8(delta
);
21302 if (haveASO(pfx
)) {
21303 /* 64to32 of 64-bit get is merely a get-put improvement
21305 putIReg32(R_RCX
, binop(Iop_Sub32
,
21306 unop(Iop_64to32
, getIReg64(R_RCX
)),
21309 putIReg64(R_RCX
, binop(Iop_Sub64
, getIReg64(R_RCX
), mkU64(1)));
21312 /* This is correct, both for 32- and 64-bit versions. If we're
21313 doing a 32-bit dec and the result is zero then the default
21314 zero extension rule will cause the upper 32 bits to be zero
21315 too. Hence a 64-bit check against zero is OK. */
21316 count
= getIReg64(R_RCX
);
21317 cond
= binop(Iop_CmpNE64
, count
, mkU64(0));
21324 zbit
= mk_amd64g_calculate_condition( AMD64CondZ
);
21325 cond
= mkAnd1(cond
, zbit
);
21329 zbit
= mk_amd64g_calculate_condition( AMD64CondNZ
);
21330 cond
= mkAnd1(cond
, zbit
);
21335 stmt( IRStmt_Exit(cond
, Ijk_Boring
, IRConst_U64(d64
), OFFB_RIP
) );
21337 DIP("loop%s%s 0x%llx\n", xtra
, haveASO(pfx
) ? "l" : "", (ULong
)d64
);
21342 /* JRCXZ or JECXZ, depending address size override. */
21343 if (have66orF2orF3(pfx
)) goto decode_failure
;
21344 d64
= (guest_RIP_bbstart
+delta
+1) + getSDisp8(delta
);
21346 if (haveASO(pfx
)) {
21348 stmt( IRStmt_Exit( binop(Iop_CmpEQ64
,
21349 unop(Iop_32Uto64
, getIReg32(R_RCX
)),
21355 DIP("jecxz 0x%llx\n", (ULong
)d64
);
21358 stmt( IRStmt_Exit( binop(Iop_CmpEQ64
,
21365 DIP("jrcxz 0x%llx\n", (ULong
)d64
);
21369 case 0xE4: /* IN imm8, AL */
21371 t1
= newTemp(Ity_I64
);
21372 abyte
= getUChar(delta
); delta
++;
21373 assign(t1
, mkU64( abyte
& 0xFF ));
21374 DIP("in%c $%d,%s\n", nameISize(sz
), (Int
)abyte
, nameIRegRAX(sz
));
21376 case 0xE5: /* IN imm8, eAX */
21377 if (!(sz
== 2 || sz
== 4)) goto decode_failure
;
21378 t1
= newTemp(Ity_I64
);
21379 abyte
= getUChar(delta
); delta
++;
21380 assign(t1
, mkU64( abyte
& 0xFF ));
21381 DIP("in%c $%d,%s\n", nameISize(sz
), (Int
)abyte
, nameIRegRAX(sz
));
21383 case 0xEC: /* IN %DX, AL */
21385 t1
= newTemp(Ity_I64
);
21386 assign(t1
, unop(Iop_16Uto64
, getIRegRDX(2)));
21387 DIP("in%c %s,%s\n", nameISize(sz
), nameIRegRDX(2),
21390 case 0xED: /* IN %DX, eAX */
21391 if (!(sz
== 2 || sz
== 4)) goto decode_failure
;
21392 t1
= newTemp(Ity_I64
);
21393 assign(t1
, unop(Iop_16Uto64
, getIRegRDX(2)));
21394 DIP("in%c %s,%s\n", nameISize(sz
), nameIRegRDX(2),
21398 /* At this point, sz indicates the width, and t1 is a 64-bit
21399 value giving port number. */
21401 if (haveF2orF3(pfx
)) goto decode_failure
;
21402 vassert(sz
== 1 || sz
== 2 || sz
== 4);
21404 t2
= newTemp(Ity_I64
);
21405 d
= unsafeIRDirty_1_N(
21408 "amd64g_dirtyhelper_IN",
21409 &amd64g_dirtyhelper_IN
,
21410 mkIRExprVec_2( mkexpr(t1
), mkU64(sz
) )
21412 /* do the call, dumping the result in t2. */
21413 stmt( IRStmt_Dirty(d
) );
21414 putIRegRAX(sz
, narrowTo( ty
, mkexpr(t2
) ) );
21418 case 0xE6: /* OUT AL, imm8 */
21420 t1
= newTemp(Ity_I64
);
21421 abyte
= getUChar(delta
); delta
++;
21422 assign( t1
, mkU64( abyte
& 0xFF ) );
21423 DIP("out%c %s,$%d\n", nameISize(sz
), nameIRegRAX(sz
), (Int
)abyte
);
21425 case 0xE7: /* OUT eAX, imm8 */
21426 if (!(sz
== 2 || sz
== 4)) goto decode_failure
;
21427 t1
= newTemp(Ity_I64
);
21428 abyte
= getUChar(delta
); delta
++;
21429 assign( t1
, mkU64( abyte
& 0xFF ) );
21430 DIP("out%c %s,$%d\n", nameISize(sz
), nameIRegRAX(sz
), (Int
)abyte
);
21432 case 0xEE: /* OUT AL, %DX */
21434 t1
= newTemp(Ity_I64
);
21435 assign( t1
, unop(Iop_16Uto64
, getIRegRDX(2)) );
21436 DIP("out%c %s,%s\n", nameISize(sz
), nameIRegRAX(sz
),
21439 case 0xEF: /* OUT eAX, %DX */
21440 if (!(sz
== 2 || sz
== 4)) goto decode_failure
;
21441 t1
= newTemp(Ity_I64
);
21442 assign( t1
, unop(Iop_16Uto64
, getIRegRDX(2)) );
21443 DIP("out%c %s,%s\n", nameISize(sz
), nameIRegRAX(sz
),
21447 /* At this point, sz indicates the width, and t1 is a 64-bit
21448 value giving port number. */
21450 if (haveF2orF3(pfx
)) goto decode_failure
;
21451 vassert(sz
== 1 || sz
== 2 || sz
== 4);
21453 d
= unsafeIRDirty_0_N(
21455 "amd64g_dirtyhelper_OUT",
21456 &amd64g_dirtyhelper_OUT
,
21457 mkIRExprVec_3( mkexpr(t1
),
21458 widenUto64( getIRegRAX(sz
) ),
21461 stmt( IRStmt_Dirty(d
) );
21465 case 0xE8: /* CALL J4 */
21466 if (haveF3(pfx
)) goto decode_failure
;
21467 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
21468 d64
= getSDisp32(delta
); delta
+= 4;
21469 d64
+= (guest_RIP_bbstart
+delta
);
21470 /* (guest_RIP_bbstart+delta) == return-to addr, d64 == call-to addr */
21471 t1
= newTemp(Ity_I64
);
21472 assign(t1
, binop(Iop_Sub64
, getIReg64(R_RSP
), mkU64(8)));
21473 putIReg64(R_RSP
, mkexpr(t1
));
21474 storeLE( mkexpr(t1
), mkU64(guest_RIP_bbstart
+delta
));
21475 t2
= newTemp(Ity_I64
);
21476 assign(t2
, mkU64((Addr64
)d64
));
21477 make_redzone_AbiHint(vbi
, t1
, t2
/*nia*/, "call-d32");
21478 jmp_lit(dres
, Ijk_Call
, d64
);
21479 vassert(dres
->whatNext
== Dis_StopHere
);
21480 DIP("call 0x%llx\n", (ULong
)d64
);
21483 case 0xE9: /* Jv (jump, 16/32 offset) */
21484 if (haveF3(pfx
)) goto decode_failure
;
21485 sz
= 4; /* Prefixes that change operand size are ignored for this
21486 instruction. Operand size is forced to 32bit. */
21487 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
21488 d64
= (guest_RIP_bbstart
+delta
+sz
) + getSDisp(sz
,delta
);
21490 jmp_lit(dres
, Ijk_Boring
, d64
);
21491 vassert(dres
->whatNext
== Dis_StopHere
);
21492 DIP("jmp 0x%llx\n", (ULong
)d64
);
21495 case 0xEB: /* Jb (jump, byte offset) */
21496 if (haveF3(pfx
)) goto decode_failure
;
21497 /* Prefixes that change operand size are ignored for this instruction. */
21498 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
21499 d64
= (guest_RIP_bbstart
+delta
+1) + getSDisp8(delta
);
21501 jmp_lit(dres
, Ijk_Boring
, d64
);
21502 vassert(dres
->whatNext
== Dis_StopHere
);
21503 DIP("jmp-8 0x%llx\n", (ULong
)d64
);
21506 case 0xF5: /* CMC */
21507 case 0xF8: /* CLC */
21508 case 0xF9: /* STC */
21509 t1
= newTemp(Ity_I64
);
21510 t2
= newTemp(Ity_I64
);
21511 assign( t1
, mk_amd64g_calculate_rflags_all() );
21514 assign( t2
, binop(Iop_Xor64
, mkexpr(t1
),
21515 mkU64(AMD64G_CC_MASK_C
)));
21519 assign( t2
, binop(Iop_And64
, mkexpr(t1
),
21520 mkU64(~AMD64G_CC_MASK_C
)));
21524 assign( t2
, binop(Iop_Or64
, mkexpr(t1
),
21525 mkU64(AMD64G_CC_MASK_C
)));
21529 vpanic("disInstr(x64)(cmc/clc/stc)");
21531 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
21532 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
21533 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(t2
) ));
21534 /* Set NDEP even though it isn't used. This makes redundant-PUT
21535 elimination of previous stores to this field work better. */
21536 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
21539 case 0xF6: { /* Grp3 Eb */
21540 Bool decode_OK
= True
;
21541 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
21542 /* We now let dis_Grp3 itself decide if F2 and/or F3 are valid */
21543 delta
= dis_Grp3 ( vbi
, pfx
, 1, delta
, &decode_OK
);
21544 if (!decode_OK
) goto decode_failure
;
21548 case 0xF7: { /* Grp3 Ev */
21549 Bool decode_OK
= True
;
21550 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
21551 /* We now let dis_Grp3 itself decide if F2 and/or F3 are valid */
21552 delta
= dis_Grp3 ( vbi
, pfx
, sz
, delta
, &decode_OK
);
21553 if (!decode_OK
) goto decode_failure
;
21557 case 0xFC: /* CLD */
21558 if (haveF2orF3(pfx
)) goto decode_failure
;
21559 stmt( IRStmt_Put( OFFB_DFLAG
, mkU64(1)) );
21563 case 0xFD: /* STD */
21564 if (haveF2orF3(pfx
)) goto decode_failure
;
21565 stmt( IRStmt_Put( OFFB_DFLAG
, mkU64(-1ULL)) );
21569 case 0xFE: { /* Grp4 Eb */
21570 Bool decode_OK
= True
;
21571 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
21572 /* We now let dis_Grp4 itself decide if F2 and/or F3 are valid */
21573 delta
= dis_Grp4 ( vbi
, pfx
, delta
, &decode_OK
);
21574 if (!decode_OK
) goto decode_failure
;
21578 case 0xFF: { /* Grp5 Ev */
21579 Bool decode_OK
= True
;
21580 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
21581 /* We now let dis_Grp5 itself decide if F2 and/or F3 are valid */
21582 delta
= dis_Grp5 ( vbi
, pfx
, sz
, delta
, dres
, &decode_OK
);
21583 if (!decode_OK
) goto decode_failure
;
21593 return deltaIN
; /* fail */
21597 /*------------------------------------------------------------*/
21599 /*--- Top-level post-escape decoders: dis_ESC_0F ---*/
21601 /*------------------------------------------------------------*/
21603 static IRTemp
math_BSWAP ( IRTemp t1
, IRType ty
)
21605 IRTemp t2
= newTemp(ty
);
21606 if (ty
== Ity_I64
) {
21607 IRTemp m8
= newTemp(Ity_I64
);
21608 IRTemp s8
= newTemp(Ity_I64
);
21609 IRTemp m16
= newTemp(Ity_I64
);
21610 IRTemp s16
= newTemp(Ity_I64
);
21611 IRTemp m32
= newTemp(Ity_I64
);
21612 assign( m8
, mkU64(0xFF00FF00FF00FF00ULL
) );
21616 binop(Iop_And64
,mkexpr(t1
),mkexpr(m8
)),
21619 binop(Iop_Shl64
,mkexpr(t1
),mkU8(8)),
21624 assign( m16
, mkU64(0xFFFF0000FFFF0000ULL
) );
21628 binop(Iop_And64
,mkexpr(s8
),mkexpr(m16
)),
21631 binop(Iop_Shl64
,mkexpr(s8
),mkU8(16)),
21636 assign( m32
, mkU64(0xFFFFFFFF00000000ULL
) );
21640 binop(Iop_And64
,mkexpr(s16
),mkexpr(m32
)),
21643 binop(Iop_Shl64
,mkexpr(s16
),mkU8(32)),
21649 if (ty
== Ity_I32
) {
21653 binop(Iop_Shl32
, mkexpr(t1
), mkU8(24)),
21656 binop(Iop_And32
, binop(Iop_Shl32
, mkexpr(t1
), mkU8(8)),
21657 mkU32(0x00FF0000)),
21659 binop(Iop_And32
, binop(Iop_Shr32
, mkexpr(t1
), mkU8(8)),
21660 mkU32(0x0000FF00)),
21661 binop(Iop_And32
, binop(Iop_Shr32
, mkexpr(t1
), mkU8(24)),
21662 mkU32(0x000000FF) )
21667 if (ty
== Ity_I16
) {
21670 binop(Iop_Shl16
, mkexpr(t1
), mkU8(8)),
21671 binop(Iop_Shr16
, mkexpr(t1
), mkU8(8)) ));
21676 return IRTemp_INVALID
;
21680 __attribute__((noinline
))
21683 /*MB_OUT*/DisResult
* dres
,
21684 /*MB_OUT*/Bool
* expect_CAS
,
21685 const VexArchInfo
* archinfo
,
21686 const VexAbiInfo
* vbi
,
21687 Prefix pfx
, Int sz
, Long deltaIN
21691 IRTemp addr
= IRTemp_INVALID
;
21692 IRTemp t1
= IRTemp_INVALID
;
21693 IRTemp t2
= IRTemp_INVALID
;
21699 /* In the first switch, look for ordinary integer insns. */
21700 Long delta
= deltaIN
;
21701 UChar opc
= getUChar(delta
);
21703 switch (opc
) { /* first switch */
21707 modrm
= getUChar(delta
);
21708 /* 0F 01 /0 -- SGDT */
21709 /* 0F 01 /1 -- SIDT */
21710 if (!epartIsReg(modrm
)
21711 && (gregLO3ofRM(modrm
) == 0 || gregLO3ofRM(modrm
) == 1)) {
21712 /* This is really revolting, but ... since each processor
21713 (core) only has one IDT and one GDT, just let the guest
21714 see it (pass-through semantics). I can't see any way to
21715 construct a faked-up value, so don't bother to try. */
21716 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
21718 switch (gregLO3ofRM(modrm
)) {
21719 case 0: DIP("sgdt %s\n", dis_buf
); break;
21720 case 1: DIP("sidt %s\n", dis_buf
); break;
21721 default: vassert(0); /*NOTREACHED*/
21723 IRDirty
* d
= unsafeIRDirty_0_N (
21725 "amd64g_dirtyhelper_SxDT",
21726 &amd64g_dirtyhelper_SxDT
,
21727 mkIRExprVec_2( mkexpr(addr
),
21728 mkU64(gregLO3ofRM(modrm
)) )
21730 /* declare we're writing memory */
21731 d
->mFx
= Ifx_Write
;
21732 d
->mAddr
= mkexpr(addr
);
21734 stmt( IRStmt_Dirty(d
) );
21737 /* 0F 01 D0 = XGETBV */
21738 if (modrm
== 0xD0 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
)) {
21741 /* Fault (SEGV) if ECX isn't zero. Intel docs say #GP and I
21742 am not sure if that translates in to SEGV or to something
21743 else, in user space. */
21744 t1
= newTemp(Ity_I32
);
21745 assign( t1
, getIReg32(R_RCX
) );
21746 stmt( IRStmt_Exit(binop(Iop_CmpNE32
, mkexpr(t1
), mkU32(0)),
21748 IRConst_U64(guest_RIP_curr_instr
),
21751 putIRegRAX(4, mkU32(7));
21752 putIRegRDX(4, mkU32(0));
21755 /* BEGIN HACKY SUPPORT FOR xend */
21756 /* 0F 01 D5 = XEND */
21757 if (modrm
== 0xD5 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
)) {
21758 /* We are never in an transaction (xbegin immediately aborts).
21759 So this just always generates a General Protection Fault. */
21761 jmp_lit(dres
, Ijk_SigSEGV
, guest_RIP_bbstart
+ delta
);
21762 vassert(dres
->whatNext
== Dis_StopHere
);
21766 /* END HACKY SUPPORT FOR xend */
21767 /* BEGIN HACKY SUPPORT FOR xtest */
21768 /* 0F 01 D6 = XTEST */
21769 if (modrm
== 0xD6 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
)) {
21770 /* Sets ZF because there never is a transaction, and all
21771 CF, OF, SF, PF and AF are always cleared by xtest. */
21774 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
21775 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
21776 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkU64(AMD64G_CC_MASK_Z
) ));
21777 /* Set NDEP even though it isn't used. This makes redundant-PUT
21778 elimination of previous stores to this field work better. */
21779 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
21782 /* END HACKY SUPPORT FOR xtest */
21783 /* 0F 01 F9 = RDTSCP */
21784 if (modrm
== 0xF9 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_RDTSCP
)) {
21786 /* Uses dirty helper:
21787 void amd64g_dirtyhelper_RDTSCP ( VexGuestAMD64State* )
21788 declared to wr rax, rcx, rdx
21790 const HChar
* fName
= "amd64g_dirtyhelper_RDTSCP";
21791 void* fAddr
= &amd64g_dirtyhelper_RDTSCP
;
21793 = unsafeIRDirty_0_N ( 0/*regparms*/,
21794 fName
, fAddr
, mkIRExprVec_1(IRExpr_GSPTR()) );
21795 /* declare guest state effects */
21797 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
21798 d
->fxState
[0].fx
= Ifx_Write
;
21799 d
->fxState
[0].offset
= OFFB_RAX
;
21800 d
->fxState
[0].size
= 8;
21801 d
->fxState
[1].fx
= Ifx_Write
;
21802 d
->fxState
[1].offset
= OFFB_RCX
;
21803 d
->fxState
[1].size
= 8;
21804 d
->fxState
[2].fx
= Ifx_Write
;
21805 d
->fxState
[2].offset
= OFFB_RDX
;
21806 d
->fxState
[2].size
= 8;
21807 /* execute the dirty call, side-effecting guest state */
21808 stmt( IRStmt_Dirty(d
) );
21809 /* RDTSCP is a serialising insn. So, just in case someone is
21810 using it as a memory fence ... */
21811 stmt( IRStmt_MBE(Imbe_Fence
) );
21815 /* else decode failed */
21819 case 0x05: /* SYSCALL */
21820 guest_RIP_next_mustcheck
= True
;
21821 guest_RIP_next_assumed
= guest_RIP_bbstart
+ delta
;
21822 putIReg64( R_RCX
, mkU64(guest_RIP_next_assumed
) );
21823 /* It's important that all guest state is up-to-date
21824 at this point. So we declare an end-of-block here, which
21825 forces any cached guest state to be flushed. */
21826 jmp_lit(dres
, Ijk_Sys_syscall
, guest_RIP_next_assumed
);
21827 vassert(dres
->whatNext
== Dis_StopHere
);
21831 case 0x0B: /* UD2 */
21832 stmt( IRStmt_Put( OFFB_RIP
, mkU64(guest_RIP_curr_instr
) ) );
21833 jmp_lit(dres
, Ijk_NoDecode
, guest_RIP_curr_instr
);
21834 vassert(dres
->whatNext
== Dis_StopHere
);
21838 case 0x0D: /* 0F 0D /0 -- prefetch mem8 */
21839 /* 0F 0D /1 -- prefetchw mem8 */
21840 if (have66orF2orF3(pfx
)) goto decode_failure
;
21841 modrm
= getUChar(delta
);
21842 if (epartIsReg(modrm
)) goto decode_failure
;
21843 if (gregLO3ofRM(modrm
) != 0 && gregLO3ofRM(modrm
) != 1)
21844 goto decode_failure
;
21845 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
21847 switch (gregLO3ofRM(modrm
)) {
21848 case 0: DIP("prefetch %s\n", dis_buf
); break;
21849 case 1: DIP("prefetchw %s\n", dis_buf
); break;
21850 default: vassert(0); /*NOTREACHED*/
21859 // Intel CET instructions can have any prefixes before NOPs
21860 // and can use any ModRM, SIB and disp
21861 modrm
= getUChar(delta
);
21862 if (epartIsReg(modrm
)) {
21864 DIP("nop%c\n", nameISize(sz
));
21866 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
21868 DIP("nop%c %s\n", nameISize(sz
), dis_buf
);
21872 case 0x31: { /* RDTSC */
21873 IRTemp val
= newTemp(Ity_I64
);
21874 IRExpr
** args
= mkIRExprVec_0();
21875 IRDirty
* d
= unsafeIRDirty_1_N (
21878 "amd64g_dirtyhelper_RDTSC",
21879 &amd64g_dirtyhelper_RDTSC
,
21882 if (have66orF2orF3(pfx
)) goto decode_failure
;
21883 /* execute the dirty call, dumping the result in val. */
21884 stmt( IRStmt_Dirty(d
) );
21885 putIRegRDX(4, unop(Iop_64HIto32
, mkexpr(val
)));
21886 putIRegRAX(4, unop(Iop_64to32
, mkexpr(val
)));
21893 case 0x42: /* CMOVBb/CMOVNAEb (cmov below) */
21894 case 0x43: /* CMOVNBb/CMOVAEb (cmov not below) */
21895 case 0x44: /* CMOVZb/CMOVEb (cmov zero) */
21896 case 0x45: /* CMOVNZb/CMOVNEb (cmov not zero) */
21897 case 0x46: /* CMOVBEb/CMOVNAb (cmov below or equal) */
21898 case 0x47: /* CMOVNBEb/CMOVAb (cmov not below or equal) */
21899 case 0x48: /* CMOVSb (cmov negative) */
21900 case 0x49: /* CMOVSb (cmov not negative) */
21901 case 0x4A: /* CMOVP (cmov parity even) */
21902 case 0x4B: /* CMOVNP (cmov parity odd) */
21903 case 0x4C: /* CMOVLb/CMOVNGEb (cmov less) */
21904 case 0x4D: /* CMOVGEb/CMOVNLb (cmov greater or equal) */
21905 case 0x4E: /* CMOVLEb/CMOVNGb (cmov less or equal) */
21906 case 0x4F: /* CMOVGb/CMOVNLEb (cmov greater) */
21907 if (haveF2orF3(pfx
)) goto decode_failure
;
21908 delta
= dis_cmov_E_G(vbi
, pfx
, sz
, (AMD64Condcode
)(opc
- 0x40), delta
);
21913 case 0x82: /* JBb/JNAEb (jump below) */
21914 case 0x83: /* JNBb/JAEb (jump not below) */
21915 case 0x84: /* JZb/JEb (jump zero) */
21916 case 0x85: /* JNZb/JNEb (jump not zero) */
21917 case 0x86: /* JBEb/JNAb (jump below or equal) */
21918 case 0x87: /* JNBEb/JAb (jump not below or equal) */
21919 case 0x88: /* JSb (jump negative) */
21920 case 0x89: /* JSb (jump not negative) */
21921 case 0x8A: /* JP (jump parity even) */
21922 case 0x8B: /* JNP/JPO (jump parity odd) */
21923 case 0x8C: /* JLb/JNGEb (jump less) */
21924 case 0x8D: /* JGEb/JNLb (jump greater or equal) */
21925 case 0x8E: /* JLEb/JNGb (jump less or equal) */
21926 case 0x8F: { /* JGb/JNLEb (jump greater) */
21928 const HChar
* comment
= "";
21929 if (haveF3(pfx
)) goto decode_failure
;
21930 if (haveF2(pfx
)) DIP("bnd ; "); /* MPX bnd prefix. */
21931 jmpDelta
= getSDisp32(delta
);
21932 d64
= (guest_RIP_bbstart
+delta
+4) + jmpDelta
;
21934 /* End the block at this point. */
21935 jcc_01( dres
, (AMD64Condcode
)(opc
- 0x80),
21936 guest_RIP_bbstart
+delta
, d64
);
21937 vassert(dres
->whatNext
== Dis_StopHere
);
21938 DIP("j%s-32 0x%llx %s\n", name_AMD64Condcode(opc
- 0x80), (ULong
)d64
,
21945 case 0x92: /* set-Bb/set-NAEb (set if below) */
21946 case 0x93: /* set-NBb/set-AEb (set if not below) */
21947 case 0x94: /* set-Zb/set-Eb (set if zero) */
21948 case 0x95: /* set-NZb/set-NEb (set if not zero) */
21949 case 0x96: /* set-BEb/set-NAb (set if below or equal) */
21950 case 0x97: /* set-NBEb/set-Ab (set if not below or equal) */
21951 case 0x98: /* set-Sb (set if negative) */
21952 case 0x99: /* set-Sb (set if not negative) */
21953 case 0x9A: /* set-P (set if parity even) */
21954 case 0x9B: /* set-NP (set if parity odd) */
21955 case 0x9C: /* set-Lb/set-NGEb (set if less) */
21956 case 0x9D: /* set-GEb/set-NLb (set if greater or equal) */
21957 case 0x9E: /* set-LEb/set-NGb (set if less or equal) */
21958 case 0x9F: /* set-Gb/set-NLEb (set if greater) */
21959 if (haveF2orF3(pfx
)) goto decode_failure
;
21960 t1
= newTemp(Ity_I8
);
21961 assign( t1
, unop(Iop_1Uto8
,mk_amd64g_calculate_condition(opc
-0x90)) );
21962 modrm
= getUChar(delta
);
21963 if (epartIsReg(modrm
)) {
21965 putIRegE(1, pfx
, modrm
, mkexpr(t1
));
21966 DIP("set%s %s\n", name_AMD64Condcode(opc
-0x90),
21967 nameIRegE(1,pfx
,modrm
));
21969 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
21971 storeLE( mkexpr(addr
), mkexpr(t1
) );
21972 DIP("set%s %s\n", name_AMD64Condcode(opc
-0x90), dis_buf
);
21977 case 0x1B: { /* Future MPX instructions, currently NOPs.
21978 BNDMK b, m F3 0F 1B
21979 BNDCL b, r/m F3 0F 1A
21980 BNDCU b, r/m F2 0F 1A
21981 BNDCN b, r/m F2 0F 1B
21982 BNDMOV b, b/m 66 0F 1A
21983 BNDMOV b/m, b 66 0F 1B
21984 BNDLDX b, mib 0F 1A
21985 BNDSTX mib, b 0F 1B */
21987 /* All instructions have two operands. One operand is always the
21988 bnd register number (bnd0-bnd3, other register numbers are
21989 ignored when MPX isn't enabled, but should generate an
21990 exception if MPX is enabled) given by gregOfRexRM. The other
21991 operand is either a ModRM:reg, ModRM:r/m or a SIB encoded
21992 address, all of which can be decoded by using either
21993 eregOfRexRM or disAMode. */
21995 modrm
= getUChar(delta
);
21996 int bnd
= gregOfRexRM(pfx
,modrm
);
21998 if (epartIsReg(modrm
)) {
21999 oper
= nameIReg64 (eregOfRexRM(pfx
,modrm
));
22002 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
22007 if (haveF3no66noF2 (pfx
)) {
22009 DIP ("bndmk %s, %%bnd%d\n", oper
, bnd
);
22010 } else /* opc == 0x1A */ {
22011 DIP ("bndcl %s, %%bnd%d\n", oper
, bnd
);
22013 } else if (haveF2no66noF3 (pfx
)) {
22015 DIP ("bndcu %s, %%bnd%d\n", oper
, bnd
);
22016 } else /* opc == 0x1B */ {
22017 DIP ("bndcn %s, %%bnd%d\n", oper
, bnd
);
22019 } else if (have66noF2noF3 (pfx
)) {
22021 DIP ("bndmov %s, %%bnd%d\n", oper
, bnd
);
22022 } else /* opc == 0x1B */ {
22023 DIP ("bndmov %%bnd%d, %s\n", bnd
, oper
);
22025 } else if (haveNo66noF2noF3 (pfx
)) {
22027 DIP ("bndldx %s, %%bnd%d\n", oper
, bnd
);
22028 } else /* opc == 0x1B */ {
22029 DIP ("bndstx %%bnd%d, %s\n", bnd
, oper
);
22031 } else goto decode_failure
;
22036 case 0xA2: { /* CPUID */
22037 /* Uses dirty helper:
22038 void amd64g_dirtyhelper_CPUID ( VexGuestAMD64State* )
22039 declared to mod rax, wr rbx, rcx, rdx
22042 const HChar
* fName
= NULL
;
22043 void* fAddr
= NULL
;
22045 if (haveF2orF3(pfx
)) goto decode_failure
;
22047 /* This isn't entirely correct, CPUID should depend on the VEX
22048 capabilities, not on the underlying CPU. See bug #324882. */
22049 if ((archinfo
->hwcaps
& VEX_HWCAPS_AMD64_SSSE3
) &&
22050 (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_CX16
) &&
22051 (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX2
)) {
22052 fName
= "amd64g_dirtyhelper_CPUID_avx2";
22053 fAddr
= &amd64g_dirtyhelper_CPUID_avx2
;
22054 /* This is a Core-i7-4910-like machine */
22056 else if ((archinfo
->hwcaps
& VEX_HWCAPS_AMD64_SSSE3
) &&
22057 (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_CX16
) &&
22058 (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
)) {
22059 fName
= "amd64g_dirtyhelper_CPUID_avx_and_cx16";
22060 fAddr
= &amd64g_dirtyhelper_CPUID_avx_and_cx16
;
22061 /* This is a Core-i5-2300-like machine */
22063 else if ((archinfo
->hwcaps
& VEX_HWCAPS_AMD64_SSSE3
) &&
22064 (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_CX16
)) {
22065 fName
= "amd64g_dirtyhelper_CPUID_sse42_and_cx16";
22066 fAddr
= &amd64g_dirtyhelper_CPUID_sse42_and_cx16
;
22067 /* This is a Core-i5-670-like machine */
22070 /* Give a CPUID for at least a baseline machine, SSE2
22071 only, and no CX16 */
22072 fName
= "amd64g_dirtyhelper_CPUID_baseline";
22073 fAddr
= &amd64g_dirtyhelper_CPUID_baseline
;
22076 vassert(fName
); vassert(fAddr
);
22077 IRExpr
** args
= NULL
;
22078 if (fAddr
== &amd64g_dirtyhelper_CPUID_avx2
22079 || fAddr
== &amd64g_dirtyhelper_CPUID_avx_and_cx16
) {
22080 Bool hasF16C
= (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_F16C
) != 0;
22081 Bool hasRDRAND
= (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_RDRAND
) != 0;
22082 Bool hasRDSEED
= (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_RDSEED
) != 0;
22083 args
= mkIRExprVec_4(IRExpr_GSPTR(),
22084 mkIRExpr_HWord(hasF16C
? 1 : 0),
22085 mkIRExpr_HWord(hasRDRAND
? 1 : 0),
22086 mkIRExpr_HWord(hasRDSEED
? 1 : 0));
22088 args
= mkIRExprVec_1(IRExpr_GSPTR());
22090 d
= unsafeIRDirty_0_N ( 0/*regparms*/, fName
, fAddr
, args
);
22092 /* Declare guest state effects. EAX, EBX, ECX and EDX are written. EAX
22093 is also read, hence is marked as Modified. ECX is sometimes also
22094 read, depending on the value in EAX; that much is obvious from
22095 inspection of the helper function.
22097 This is a bit of a problem: if we mark ECX as Modified -- hence, by
22098 implication, Read -- then we may get false positives from Memcheck in
22099 the case where ECX contains undefined bits, but the EAX value is such
22100 that the instruction wouldn't read ECX anyway. The obvious way out
22101 of this is to mark it as written only, but that means Memcheck will
22102 effectively ignore undefinedness in the incoming ECX value. That
22103 seems like a small loss to take to avoid false positives here,
22104 though. Fundamentally the problem exists because CPUID itself has
22105 conditional dataflow -- whether ECX is read depends on the value in
22106 EAX -- but the annotation mechanism for dirty helpers can't represent
22107 that conditionality.
22109 A fully-accurate solution might be to change the helpers so that the
22110 EAX and ECX values are passed as parameters. Then, for the ECX
22111 value, we can pass, effectively "if EAX is some value for which ECX
22112 is ignored { 0 } else { ECX }", and Memcheck will see and understand
22113 this conditionality. */
22115 vex_bzero(&d
->fxState
, sizeof(d
->fxState
));
22116 d
->fxState
[0].fx
= Ifx_Modify
;
22117 d
->fxState
[0].offset
= OFFB_RAX
;
22118 d
->fxState
[0].size
= 8;
22119 d
->fxState
[1].fx
= Ifx_Write
;
22120 d
->fxState
[1].offset
= OFFB_RBX
;
22121 d
->fxState
[1].size
= 8;
22122 d
->fxState
[2].fx
= Ifx_Write
; /* was: Ifx_Modify; */
22123 d
->fxState
[2].offset
= OFFB_RCX
;
22124 d
->fxState
[2].size
= 8;
22125 d
->fxState
[3].fx
= Ifx_Write
;
22126 d
->fxState
[3].offset
= OFFB_RDX
;
22127 d
->fxState
[3].size
= 8;
22128 /* Execute the dirty call, side-effecting guest state. */
22129 stmt( IRStmt_Dirty(d
) );
22130 /* CPUID is a serialising insn. So, just in case someone is
22131 using it as a memory fence ... */
22132 stmt( IRStmt_MBE(Imbe_Fence
) );
22137 case 0xA3: { /* BT Gv,Ev */
22138 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
22140 if (sz
!= 8 && sz
!= 4 && sz
!= 2) goto decode_failure
;
22141 delta
= dis_bt_G_E ( vbi
, pfx
, sz
, delta
, BtOpNone
, &ok
);
22142 if (!ok
) goto decode_failure
;
22146 case 0xA4: /* SHLDv imm8,Gv,Ev */
22147 modrm
= getUChar(delta
);
22148 d64
= delta
+ lengthAMode(pfx
, delta
);
22149 vex_sprintf(dis_buf
, "$%d", (Int
)getUChar(d64
));
22150 delta
= dis_SHLRD_Gv_Ev (
22151 vbi
, pfx
, delta
, modrm
, sz
,
22152 mkU8(getUChar(d64
)), True
, /* literal */
22153 dis_buf
, True
/* left */ );
22156 case 0xA5: /* SHLDv %cl,Gv,Ev */
22157 modrm
= getUChar(delta
);
22158 delta
= dis_SHLRD_Gv_Ev (
22159 vbi
, pfx
, delta
, modrm
, sz
,
22160 getIRegCL(), False
, /* not literal */
22161 "%cl", True
/* left */ );
22164 case 0xAB: { /* BTS Gv,Ev */
22165 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
22167 if (sz
!= 8 && sz
!= 4 && sz
!= 2) goto decode_failure
;
22168 delta
= dis_bt_G_E ( vbi
, pfx
, sz
, delta
, BtOpSet
, &ok
);
22169 if (!ok
) goto decode_failure
;
22173 case 0xAC: /* SHRDv imm8,Gv,Ev */
22174 modrm
= getUChar(delta
);
22175 d64
= delta
+ lengthAMode(pfx
, delta
);
22176 vex_sprintf(dis_buf
, "$%d", (Int
)getUChar(d64
));
22177 delta
= dis_SHLRD_Gv_Ev (
22178 vbi
, pfx
, delta
, modrm
, sz
,
22179 mkU8(getUChar(d64
)), True
, /* literal */
22180 dis_buf
, False
/* right */ );
22183 case 0xAD: /* SHRDv %cl,Gv,Ev */
22184 modrm
= getUChar(delta
);
22185 delta
= dis_SHLRD_Gv_Ev (
22186 vbi
, pfx
, delta
, modrm
, sz
,
22187 getIRegCL(), False
, /* not literal */
22188 "%cl", False
/* right */);
22191 case 0xAF: /* IMUL Ev, Gv */
22192 if (haveF2orF3(pfx
)) goto decode_failure
;
22193 delta
= dis_mul_E_G ( vbi
, pfx
, sz
, delta
);
22196 case 0xB0: { /* CMPXCHG Gb,Eb */
22198 /* We let dis_cmpxchg_G_E decide whether F2 or F3 are allowable. */
22199 delta
= dis_cmpxchg_G_E ( &ok
, vbi
, pfx
, 1, delta
);
22200 if (!ok
) goto decode_failure
;
22204 case 0xB1: { /* CMPXCHG Gv,Ev (allowed in 16,32,64 bit) */
22206 /* We let dis_cmpxchg_G_E decide whether F2 or F3 are allowable. */
22207 if (sz
!= 2 && sz
!= 4 && sz
!= 8) goto decode_failure
;
22208 delta
= dis_cmpxchg_G_E ( &ok
, vbi
, pfx
, sz
, delta
);
22209 if (!ok
) goto decode_failure
;
22213 case 0xB3: { /* BTR Gv,Ev */
22214 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
22216 if (sz
!= 8 && sz
!= 4 && sz
!= 2) goto decode_failure
;
22217 delta
= dis_bt_G_E ( vbi
, pfx
, sz
, delta
, BtOpReset
, &ok
);
22218 if (!ok
) goto decode_failure
;
22222 case 0xB6: /* MOVZXb Eb,Gv */
22223 if (haveF2orF3(pfx
)) goto decode_failure
;
22224 if (sz
!= 2 && sz
!= 4 && sz
!= 8)
22225 goto decode_failure
;
22226 delta
= dis_movx_E_G ( vbi
, pfx
, delta
, 1, sz
, False
);
22229 case 0xB7: /* MOVZXw Ew,Gv */
22230 if (haveF2orF3(pfx
)) goto decode_failure
;
22231 if (sz
!= 4 && sz
!= 8)
22232 goto decode_failure
;
22233 delta
= dis_movx_E_G ( vbi
, pfx
, delta
, 2, sz
, False
);
22236 case 0xBA: { /* Grp8 Ib,Ev */
22237 /* We let dis_Grp8_Imm decide whether F2 or F3 are allowable. */
22238 Bool decode_OK
= False
;
22239 modrm
= getUChar(delta
);
22240 am_sz
= lengthAMode(pfx
,delta
);
22241 d64
= getSDisp8(delta
+ am_sz
);
22242 delta
= dis_Grp8_Imm ( vbi
, pfx
, delta
, modrm
, am_sz
, sz
, d64
,
22245 goto decode_failure
;
22249 case 0xBB: { /* BTC Gv,Ev */
22250 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
22252 if (sz
!= 8 && sz
!= 4 && sz
!= 2) goto decode_failure
;
22253 delta
= dis_bt_G_E ( vbi
, pfx
, sz
, delta
, BtOpComp
, &ok
);
22254 if (!ok
) goto decode_failure
;
22258 case 0xBC: /* BSF Gv,Ev */
22259 if (!haveF2orF3(pfx
)
22260 || (haveF3noF2(pfx
)
22261 && 0 == (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_BMI
))) {
22262 /* no-F2 no-F3 0F BC = BSF
22263 or F3 0F BC = REP; BSF on older CPUs. */
22264 delta
= dis_bs_E_G ( vbi
, pfx
, sz
, delta
, True
);
22267 /* Fall through, since F3 0F BC is TZCNT, and needs to
22268 be handled by dis_ESC_0F__SSE4. */
22271 case 0xBD: /* BSR Gv,Ev */
22272 if (!haveF2orF3(pfx
)
22273 || (haveF3noF2(pfx
)
22274 && 0 == (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_LZCNT
))) {
22275 /* no-F2 no-F3 0F BD = BSR
22276 or F3 0F BD = REP; BSR on older CPUs. */
22277 delta
= dis_bs_E_G ( vbi
, pfx
, sz
, delta
, False
);
22280 /* Fall through, since F3 0F BD is LZCNT, and needs to
22281 be handled by dis_ESC_0F__SSE4. */
22284 case 0xBE: /* MOVSXb Eb,Gv */
22285 if (haveF2orF3(pfx
)) goto decode_failure
;
22286 if (sz
!= 2 && sz
!= 4 && sz
!= 8)
22287 goto decode_failure
;
22288 delta
= dis_movx_E_G ( vbi
, pfx
, delta
, 1, sz
, True
);
22291 case 0xBF: /* MOVSXw Ew,Gv */
22292 if (haveF2orF3(pfx
)) goto decode_failure
;
22293 if (sz
!= 4 && sz
!= 8)
22294 goto decode_failure
;
22295 delta
= dis_movx_E_G ( vbi
, pfx
, delta
, 2, sz
, True
);
22298 case 0xC0: { /* XADD Gb,Eb */
22299 Bool decode_OK
= False
;
22300 delta
= dis_xadd_G_E ( &decode_OK
, vbi
, pfx
, 1, delta
);
22302 goto decode_failure
;
22306 case 0xC1: { /* XADD Gv,Ev */
22307 Bool decode_OK
= False
;
22308 delta
= dis_xadd_G_E ( &decode_OK
, vbi
, pfx
, sz
, delta
);
22310 goto decode_failure
;
22315 modrm
= getUChar(delta
);
22317 // Detecting valid CMPXCHG combinations is pretty complex.
22318 Bool isValidCMPXCHG
= gregLO3ofRM(modrm
) == 1;
22319 if (isValidCMPXCHG
) {
22320 if (have66(pfx
)) isValidCMPXCHG
= False
;
22321 if (sz
!= 4 && sz
!= 8) isValidCMPXCHG
= False
;
22322 if (sz
== 8 && !(archinfo
->hwcaps
& VEX_HWCAPS_AMD64_CX16
))
22323 isValidCMPXCHG
= False
;
22324 if (epartIsReg(modrm
)) isValidCMPXCHG
= False
;
22325 if (haveF2orF3(pfx
)) {
22326 /* Since the e-part is memory only, F2 or F3 (one or the
22327 other) is acceptable if LOCK is also present. But only
22329 if (sz
== 8) isValidCMPXCHG
= False
;
22330 if (haveF2andF3(pfx
) || !haveLOCK(pfx
)) isValidCMPXCHG
= False
;
22334 /* 0F C7 /1 (with qualifications) = CMPXCHG */
22335 if (isValidCMPXCHG
) {
22336 // Note that we've already read the modrm byte by this point, but we
22337 // haven't moved delta past it.
22338 IRType elemTy
= sz
==4 ? Ity_I32
: Ity_I64
;
22339 IRTemp expdHi
= newTemp(elemTy
);
22340 IRTemp expdLo
= newTemp(elemTy
);
22341 IRTemp dataHi
= newTemp(elemTy
);
22342 IRTemp dataLo
= newTemp(elemTy
);
22343 IRTemp oldHi
= newTemp(elemTy
);
22344 IRTemp oldLo
= newTemp(elemTy
);
22345 IRTemp flags_old
= newTemp(Ity_I64
);
22346 IRTemp flags_new
= newTemp(Ity_I64
);
22347 IRTemp success
= newTemp(Ity_I1
);
22348 IROp opOR
= sz
==4 ? Iop_Or32
: Iop_Or64
;
22349 IROp opXOR
= sz
==4 ? Iop_Xor32
: Iop_Xor64
;
22350 IROp opCasCmpEQ
= sz
==4 ? Iop_CasCmpEQ32
: Iop_CasCmpEQ64
;
22351 IRExpr
* zero
= sz
==4 ? mkU32(0) : mkU64(0);
22352 IRTemp expdHi64
= newTemp(Ity_I64
);
22353 IRTemp expdLo64
= newTemp(Ity_I64
);
22355 /* Translate this using a DCAS, even if there is no LOCK
22356 prefix. Life is too short to bother with generating two
22357 different translations for the with/without-LOCK-prefix
22359 *expect_CAS
= True
;
22361 /* Generate address */
22362 vassert(!epartIsReg(modrm
));
22363 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
22366 /* cmpxchg16b requires an alignment check. */
22368 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
22370 /* Get the expected and new values. */
22371 assign( expdHi64
, getIReg64(R_RDX
) );
22372 assign( expdLo64
, getIReg64(R_RAX
) );
22374 /* These are the correctly-sized expected and new values.
22375 However, we also get expdHi64/expdLo64 above as 64-bits
22376 regardless, because we will need them later in the 32-bit
22377 case (paradoxically). */
22378 assign( expdHi
, sz
==4 ? unop(Iop_64to32
, mkexpr(expdHi64
))
22379 : mkexpr(expdHi64
) );
22380 assign( expdLo
, sz
==4 ? unop(Iop_64to32
, mkexpr(expdLo64
))
22381 : mkexpr(expdLo64
) );
22382 assign( dataHi
, sz
==4 ? getIReg32(R_RCX
) : getIReg64(R_RCX
) );
22383 assign( dataLo
, sz
==4 ? getIReg32(R_RBX
) : getIReg64(R_RBX
) );
22387 mkIRCAS( oldHi
, oldLo
,
22388 Iend_LE
, mkexpr(addr
),
22389 mkexpr(expdHi
), mkexpr(expdLo
),
22390 mkexpr(dataHi
), mkexpr(dataLo
)
22393 /* success when oldHi:oldLo == expdHi:expdLo */
22397 binop(opXOR
, mkexpr(oldHi
), mkexpr(expdHi
)),
22398 binop(opXOR
, mkexpr(oldLo
), mkexpr(expdLo
))
22403 /* If the DCAS is successful, that is to say oldHi:oldLo ==
22404 expdHi:expdLo, then put expdHi:expdLo back in RDX:RAX,
22405 which is where they came from originally. Both the actual
22406 contents of these two regs, and any shadow values, are
22407 unchanged. If the DCAS fails then we're putting into
22408 RDX:RAX the value seen in memory. */
22409 /* Now of course there's a complication in the 32-bit case
22410 (bah!): if the DCAS succeeds, we need to leave RDX:RAX
22411 unchanged; but if we use the same scheme as in the 64-bit
22412 case, we get hit by the standard rule that a write to the
22413 bottom 32 bits of an integer register zeros the upper 32
22414 bits. And so the upper halves of RDX and RAX mysteriously
22415 become zero. So we have to stuff back in the original
22416 64-bit values which we previously stashed in
22417 expdHi64:expdLo64, even if we're doing a cmpxchg8b. */
22418 /* It's just _so_ much fun ... */
22420 IRExpr_ITE( mkexpr(success
),
22422 sz
== 4 ? unop(Iop_32Uto64
, mkexpr(oldHi
))
22426 IRExpr_ITE( mkexpr(success
),
22428 sz
== 4 ? unop(Iop_32Uto64
, mkexpr(oldLo
))
22432 /* Copy the success bit into the Z flag and leave the others
22434 assign( flags_old
, widenUto64(mk_amd64g_calculate_rflags_all()));
22438 binop(Iop_And64
, mkexpr(flags_old
),
22439 mkU64(~AMD64G_CC_MASK_Z
)),
22442 unop(Iop_1Uto64
, mkexpr(success
)), mkU64(1)),
22443 mkU8(AMD64G_CC_SHIFT_Z
)) ));
22445 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
22446 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(flags_new
) ));
22447 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
22448 /* Set NDEP even though it isn't used. This makes
22449 redundant-PUT elimination of previous stores to this field
22451 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
22453 /* Sheesh. Aren't you glad it was me and not you that had to
22454 write and validate all this grunge? */
22456 DIP("cmpxchg8b %s\n", dis_buf
);
22458 } // if (isValidCMPXCHG)
22460 /* 0F C7 /6 no-F2-or-F3 = RDRAND, 0F C7 /7 = RDSEED */
22461 int insn
= gregLO3ofRM(modrm
);
22462 if (((insn
== 6 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_RDRAND
))
22463 || (insn
== 7 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_RDSEED
)))
22464 && epartIsReg(modrm
) && haveNoF2noF3(pfx
)
22465 && (sz
== 8 || sz
== 4 || sz
== 2)) {
22467 delta
++; // move past modrm
22468 IRType ty
= szToITy(sz
);
22470 // Pull a first 32 bits of randomness, plus C flag, out of the host.
22471 IRTemp pairLO
= newTemp(Ity_I64
);
22473 if (insn
== 6) /* RDRAND */
22474 dLO
= unsafeIRDirty_1_N(pairLO
, 0/*regparms*/,
22475 "amd64g_dirtyhelper_RDRAND",
22476 &amd64g_dirtyhelper_RDRAND
, mkIRExprVec_0());
22478 dLO
= unsafeIRDirty_1_N(pairLO
, 0/*regparms*/,
22479 "amd64g_dirtyhelper_RDSEED",
22480 &amd64g_dirtyhelper_RDSEED
, mkIRExprVec_0());
22482 // There are no guest state or memory effects to declare for |dLO|.
22483 stmt( IRStmt_Dirty(dLO
) );
22485 IRTemp randsLO
= newTemp(Ity_I32
);
22486 assign(randsLO
, unop(Iop_64to32
, mkexpr(pairLO
)));
22487 IRTemp cLO
= newTemp(Ity_I64
);
22488 assign(cLO
, binop(Iop_Shr64
, mkexpr(pairLO
), mkU8(32)));
22490 // We'll assemble the final pairing in (cFinal, randsNearlyFinal).
22491 IRTemp randsNearlyFinal
= newTemp(Ity_I64
);
22492 IRTemp cFinal
= newTemp(Ity_I64
);
22494 if (ty
== Ity_I64
) {
22495 // Pull another 32 bits of randomness out of the host.
22496 IRTemp pairHI
= newTemp(Ity_I64
);
22498 if (insn
== 6) /* RDRAND */
22499 dHI
= unsafeIRDirty_1_N(pairHI
, 0/*regparms*/,
22500 "amd64g_dirtyhelper_RDRAND",
22501 &amd64g_dirtyhelper_RDRAND
, mkIRExprVec_0());
22503 dHI
= unsafeIRDirty_1_N(pairHI
, 0/*regparms*/,
22504 "amd64g_dirtyhelper_RDSEED",
22505 &amd64g_dirtyhelper_RDSEED
, mkIRExprVec_0());
22507 // There are no guest state or memory effects to declare for |dHI|.
22508 stmt( IRStmt_Dirty(dHI
) );
22510 IRTemp randsHI
= newTemp(Ity_I32
);
22511 assign(randsHI
, unop(Iop_64to32
, mkexpr(pairHI
)));
22512 IRTemp cHI
= newTemp(Ity_I64
);
22513 assign(cHI
, binop(Iop_Shr64
, mkexpr(pairHI
), mkU8(32)));
22514 assign(randsNearlyFinal
, binop(Iop_32HLto64
,
22515 mkexpr(randsHI
), mkexpr(randsLO
)));
22516 assign(cFinal
, binop(Iop_And64
,
22517 binop(Iop_And64
, mkexpr(cHI
), mkexpr(cLO
)),
22520 assign(randsNearlyFinal
, unop(Iop_32Uto64
, mkexpr(randsLO
)));
22521 assign(cFinal
, binop(Iop_And64
, mkexpr(cLO
), mkU64(1)));
22524 /* Now cFinal[0] is the final success/failure flag (cFinal[0] == 1
22525 means success). But there's another twist. If we failed then the
22526 returned value must be forced to zero. Otherwise we could have the
22527 situation, when sz==8, where one of the host calls failed but the
22528 other didn't. This would give cFinal[0] == 0 (correctly) but
22529 randsNearlyFinal not being zero, because it contains the 32 bit
22530 result of the non-failing call. */
22531 IRTemp randsFinal
= newTemp(Ity_I64
);
22534 mkexpr(randsNearlyFinal
),
22536 binop(Iop_Shl64
, mkexpr(cFinal
), mkU8(63)),
22540 // So, finally, update the guest state.
22541 putIRegE(sz
, pfx
, modrm
, narrowTo(ty
, mkexpr(randsFinal
)));
22543 // Set C=<success indication>, O,S,Z,A,P = 0. cFinal has already been
22544 // masked so only the lowest bit remains.
22545 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(AMD64G_CC_OP_COPY
) ));
22546 stmt( IRStmt_Put( OFFB_CC_DEP1
, mkexpr(cFinal
) ));
22547 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0) ));
22548 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
22551 DIP("rdrand %s", nameIRegE(sz
, pfx
, modrm
));
22553 DIP("rdseed %s", nameIRegE(sz
, pfx
, modrm
));
22559 goto decode_failure
;
22562 case 0xC8: /* BSWAP %eax */
22569 case 0xCF: /* BSWAP %edi */
22570 if (haveF2orF3(pfx
)) goto decode_failure
;
22571 /* According to the AMD64 docs, this insn can have size 4 or
22574 t1
= newTemp(Ity_I32
);
22575 assign( t1
, getIRegRexB(4, pfx
, opc
-0xC8) );
22576 t2
= math_BSWAP( t1
, Ity_I32
);
22577 putIRegRexB(4, pfx
, opc
-0xC8, mkexpr(t2
));
22578 DIP("bswapl %s\n", nameIRegRexB(4, pfx
, opc
-0xC8));
22582 t1
= newTemp(Ity_I64
);
22583 t2
= newTemp(Ity_I64
);
22584 assign( t1
, getIRegRexB(8, pfx
, opc
-0xC8) );
22585 t2
= math_BSWAP( t1
, Ity_I64
);
22586 putIRegRexB(8, pfx
, opc
-0xC8, mkexpr(t2
));
22587 DIP("bswapq %s\n", nameIRegRexB(8, pfx
, opc
-0xC8));
22590 goto decode_failure
;
22595 } /* first switch */
22598 /* =-=-=-=-=-=-=-=-= MMXery =-=-=-=-=-=-=-=-= */
22599 /* In the second switch, pick off MMX insns. */
22601 if (!have66orF2orF3(pfx
)) {
22602 /* So there's no SIMD prefix. */
22604 vassert(sz
== 4 || sz
== 8);
22606 switch (opc
) { /* second switch */
22610 case 0x73: /* PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
22612 case 0x6E: /* MOVD (src)ireg-or-mem, (dst)mmxreg */
22613 case 0x7E: /* MOVD (src)mmxreg, (dst)ireg-or-mem */
22614 case 0x7F: /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
22615 case 0x6F: /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
22619 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
22622 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
22625 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
22629 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
22632 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
22635 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
22637 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
22638 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
22640 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
22644 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
22648 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
22650 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
22651 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
22652 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
22656 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
22660 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
22662 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
22663 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
22664 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
22665 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
22667 case 0xF1: /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
22671 case 0xD1: /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
22675 case 0xE1: /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
22677 Bool decode_OK
= False
;
22678 delta
= dis_MMX ( &decode_OK
, vbi
, pfx
, sz
, deltaIN
);
22681 goto decode_failure
;
22686 } /* second switch */
22690 /* A couple of MMX corner cases */
22691 if (opc
== 0x0E/* FEMMS */ || opc
== 0x77/* EMMS */) {
22693 goto decode_failure
;
22694 do_EMMS_preamble();
22699 /* =-=-=-=-=-=-=-=-= SSE2ery =-=-=-=-=-=-=-=-= */
22700 /* Perhaps it's an SSE or SSE2 instruction. We can try this
22701 without checking the guest hwcaps because SSE2 is a baseline
22702 facility in 64 bit mode. */
22704 Bool decode_OK
= False
;
22705 delta
= dis_ESC_0F__SSE2 ( &decode_OK
,
22706 archinfo
, vbi
, pfx
, sz
, deltaIN
, dres
);
22711 /* =-=-=-=-=-=-=-=-= SSE3ery =-=-=-=-=-=-=-=-= */
22712 /* Perhaps it's a SSE3 instruction. FIXME: check guest hwcaps
22715 Bool decode_OK
= False
;
22716 delta
= dis_ESC_0F__SSE3 ( &decode_OK
, vbi
, pfx
, sz
, deltaIN
);
22721 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */
22722 /* Perhaps it's a SSE4 instruction. FIXME: check guest hwcaps
22725 Bool decode_OK
= False
;
22726 delta
= dis_ESC_0F__SSE4 ( &decode_OK
,
22727 archinfo
, vbi
, pfx
, sz
, deltaIN
);
22733 return deltaIN
; /* fail */
22737 /*------------------------------------------------------------*/
22739 /*--- Top-level post-escape decoders: dis_ESC_0F38 ---*/
22741 /*------------------------------------------------------------*/
22743 __attribute__((noinline
))
22745 Long
dis_ESC_0F38 (
22746 /*MB_OUT*/DisResult
* dres
,
22747 const VexArchInfo
* archinfo
,
22748 const VexAbiInfo
* vbi
,
22749 Prefix pfx
, Int sz
, Long deltaIN
22752 Long delta
= deltaIN
;
22753 UChar opc
= getUChar(delta
);
22757 case 0xF0: /* 0F 38 F0 = MOVBE m16/32/64(E), r16/32/64(G) */
22758 case 0xF1: { /* 0F 38 F1 = MOVBE r16/32/64(G), m16/32/64(E) */
22759 if (!haveF2orF3(pfx
) && !haveVEX(pfx
)
22760 && (sz
== 2 || sz
== 4 || sz
== 8)) {
22761 IRTemp addr
= IRTemp_INVALID
;
22765 modrm
= getUChar(delta
);
22766 if (epartIsReg(modrm
)) break;
22767 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
22769 IRType ty
= szToITy(sz
);
22770 IRTemp src
= newTemp(ty
);
22771 if (opc
== 0xF0) { /* LOAD */
22772 assign(src
, loadLE(ty
, mkexpr(addr
)));
22773 IRTemp dst
= math_BSWAP(src
, ty
);
22774 putIRegG(sz
, pfx
, modrm
, mkexpr(dst
));
22775 DIP("movbe %s,%s\n", dis_buf
, nameIRegG(sz
, pfx
, modrm
));
22776 } else { /* STORE */
22777 assign(src
, getIRegG(sz
, pfx
, modrm
));
22778 IRTemp dst
= math_BSWAP(src
, ty
);
22779 storeLE(mkexpr(addr
), mkexpr(dst
));
22780 DIP("movbe %s,%s\n", nameIRegG(sz
, pfx
, modrm
), dis_buf
);
22784 /* else fall through; maybe one of the decoders below knows what
22793 /* =-=-=-=-=-=-=-=-= SSSE3ery =-=-=-=-=-=-=-=-= */
22794 /* Perhaps it's an SSSE3 instruction. FIXME: consult guest hwcaps
22795 rather than proceeding indiscriminately. */
22797 Bool decode_OK
= False
;
22798 delta
= dis_ESC_0F38__SupSSE3 ( &decode_OK
, vbi
, pfx
, sz
, deltaIN
);
22803 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */
22804 /* Perhaps it's an SSE4 instruction. FIXME: consult guest hwcaps
22805 rather than proceeding indiscriminately. */
22807 Bool decode_OK
= False
;
22808 delta
= dis_ESC_0F38__SSE4 ( &decode_OK
, vbi
, pfx
, sz
, deltaIN
);
22813 /* Ignore previous decode attempts and restart from the beginning of
22814 the instruction. */
22816 opc
= getUChar(delta
);
22822 /* 66 0F 38 F6 = ADCX r32/64(G), m32/64(E) */
22823 /* F3 0F 38 F6 = ADOX r32/64(G), m32/64(E) */
22824 /* These were introduced in Broadwell. Gate them on AVX so as to at
22825 least reject them on earlier guests. Has no host requirements. */
22826 if (have66noF2noF3(pfx
) && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
)) {
22828 sz
= 4; /* 66 prefix but operand size is 4/8 */
22830 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Add8
, WithFlagCarryX
, True
,
22831 sz
, delta
, "adcx" );
22834 if (haveF3no66noF2(pfx
) && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
)) {
22835 delta
= dis_op2_E_G ( vbi
, pfx
, Iop_Add8
, WithFlagOverX
, True
,
22836 sz
, delta
, "adox" );
22839 /* else fall through */
22847 /*decode_failure:*/
22848 return deltaIN
; /* fail */
22852 /*------------------------------------------------------------*/
22854 /*--- Top-level post-escape decoders: dis_ESC_0F3A ---*/
22856 /*------------------------------------------------------------*/
22858 __attribute__((noinline
))
22860 Long
dis_ESC_0F3A (
22861 /*MB_OUT*/DisResult
* dres
,
22862 const VexArchInfo
* archinfo
,
22863 const VexAbiInfo
* vbi
,
22864 Prefix pfx
, Int sz
, Long deltaIN
22867 Long delta
= deltaIN
;
22868 UChar opc
= getUChar(delta
);
22877 /* =-=-=-=-=-=-=-=-= SSSE3ery =-=-=-=-=-=-=-=-= */
22878 /* Perhaps it's an SSSE3 instruction. FIXME: consult guest hwcaps
22879 rather than proceeding indiscriminately. */
22881 Bool decode_OK
= False
;
22882 delta
= dis_ESC_0F3A__SupSSE3 ( &decode_OK
, vbi
, pfx
, sz
, deltaIN
);
22887 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */
22888 /* Perhaps it's an SSE4 instruction. FIXME: consult guest hwcaps
22889 rather than proceeding indiscriminately. */
22891 Bool decode_OK
= False
;
22892 delta
= dis_ESC_0F3A__SSE4 ( &decode_OK
, vbi
, pfx
, sz
, deltaIN
);
22897 return deltaIN
; /* fail */
22901 /*------------------------------------------------------------*/
22903 /*--- Top-level post-escape decoders: dis_ESC_0F__VEX ---*/
22905 /*------------------------------------------------------------*/
22907 /* FIXME: common up with the _256_ version below? */
22909 Long
dis_VEX_NDS_128_AnySimdPfx_0F_WIG (
22910 /*OUT*/Bool
* uses_vvvv
, const VexAbiInfo
* vbi
,
22911 Prefix pfx
, Long delta
, const HChar
* name
,
22912 /* The actual operation. Use either 'op' or 'opfn',
22914 IROp op
, IRTemp(*opFn
)(IRTemp
,IRTemp
),
22915 Bool invertLeftArg
,
22919 UChar modrm
= getUChar(delta
);
22920 UInt rD
= gregOfRexRM(pfx
, modrm
);
22921 UInt rSL
= getVexNvvvv(pfx
);
22922 IRTemp tSL
= newTemp(Ity_V128
);
22923 IRTemp tSR
= newTemp(Ity_V128
);
22924 IRTemp addr
= IRTemp_INVALID
;
22927 vassert(0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*WIG?*/);
22929 assign(tSL
, invertLeftArg
? unop(Iop_NotV128
, getXMMReg(rSL
))
22932 if (epartIsReg(modrm
)) {
22933 UInt rSR
= eregOfRexRM(pfx
, modrm
);
22935 assign(tSR
, getXMMReg(rSR
));
22936 DIP("%s %s,%s,%s\n",
22937 name
, nameXMMReg(rSR
), nameXMMReg(rSL
), nameXMMReg(rD
));
22939 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
22941 assign(tSR
, loadLE(Ity_V128
, mkexpr(addr
)));
22942 DIP("%s %s,%s,%s\n",
22943 name
, dis_buf
, nameXMMReg(rSL
), nameXMMReg(rD
));
22946 IRTemp res
= IRTemp_INVALID
;
22947 if (op
!= Iop_INVALID
) {
22948 vassert(opFn
== NULL
);
22949 res
= newTemp(Ity_V128
);
22950 if (requiresRMode(op
)) {
22951 IRTemp rm
= newTemp(Ity_I32
);
22952 assign(rm
, get_FAKE_roundingmode()); /* XXXROUNDINGFIXME */
22953 assign(res
, swapArgs
22954 ? triop(op
, mkexpr(rm
), mkexpr(tSR
), mkexpr(tSL
))
22955 : triop(op
, mkexpr(rm
), mkexpr(tSL
), mkexpr(tSR
)));
22957 assign(res
, swapArgs
22958 ? binop(op
, mkexpr(tSR
), mkexpr(tSL
))
22959 : binop(op
, mkexpr(tSL
), mkexpr(tSR
)));
22962 vassert(opFn
!= NULL
);
22963 res
= swapArgs
? opFn(tSR
, tSL
) : opFn(tSL
, tSR
);
22966 putYMMRegLoAndZU(rD
, mkexpr(res
));
22973 /* Handle a VEX_NDS_128_66_0F_WIG (3-addr) insn, with a simple IROp
22974 for the operation, no inversion of the left arg, and no swapping of
22977 Long
dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple (
22978 /*OUT*/Bool
* uses_vvvv
, const VexAbiInfo
* vbi
,
22979 Prefix pfx
, Long delta
, const HChar
* name
,
22983 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
22984 uses_vvvv
, vbi
, pfx
, delta
, name
, op
, NULL
, False
, False
);
22988 /* Handle a VEX_NDS_128_66_0F_WIG (3-addr) insn, using the given IR
22989 generator to compute the result, no inversion of the left
22990 arg, and no swapping of args. */
22992 Long
dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex (
22993 /*OUT*/Bool
* uses_vvvv
, const VexAbiInfo
* vbi
,
22994 Prefix pfx
, Long delta
, const HChar
* name
,
22995 IRTemp(*opFn
)(IRTemp
,IRTemp
)
22998 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
22999 uses_vvvv
, vbi
, pfx
, delta
, name
,
23000 Iop_INVALID
, opFn
, False
, False
);
23004 /* Vector by scalar shift of V by the amount specified at the bottom
23006 static ULong
dis_AVX128_shiftV_byE ( const VexAbiInfo
* vbi
,
23007 Prefix pfx
, Long delta
,
23008 const HChar
* opname
, IROp op
)
23013 Bool shl
, shr
, sar
;
23014 UChar modrm
= getUChar(delta
);
23015 UInt rG
= gregOfRexRM(pfx
,modrm
);
23016 UInt rV
= getVexNvvvv(pfx
);;
23017 IRTemp g0
= newTemp(Ity_V128
);
23018 IRTemp g1
= newTemp(Ity_V128
);
23019 IRTemp amt
= newTemp(Ity_I64
);
23020 IRTemp amt8
= newTemp(Ity_I8
);
23021 if (epartIsReg(modrm
)) {
23022 UInt rE
= eregOfRexRM(pfx
,modrm
);
23023 assign( amt
, getXMMRegLane64(rE
, 0) );
23024 DIP("%s %s,%s,%s\n", opname
, nameXMMReg(rE
),
23025 nameXMMReg(rV
), nameXMMReg(rG
) );
23028 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23029 assign( amt
, loadLE(Ity_I64
, mkexpr(addr
)) );
23030 DIP("%s %s,%s,%s\n", opname
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
) );
23033 assign( g0
, getXMMReg(rV
) );
23034 assign( amt8
, unop(Iop_64to8
, mkexpr(amt
)) );
23036 shl
= shr
= sar
= False
;
23039 case Iop_ShlN16x8
: shl
= True
; size
= 32; break;
23040 case Iop_ShlN32x4
: shl
= True
; size
= 32; break;
23041 case Iop_ShlN64x2
: shl
= True
; size
= 64; break;
23042 case Iop_SarN16x8
: sar
= True
; size
= 16; break;
23043 case Iop_SarN32x4
: sar
= True
; size
= 32; break;
23044 case Iop_ShrN16x8
: shr
= True
; size
= 16; break;
23045 case Iop_ShrN32x4
: shr
= True
; size
= 32; break;
23046 case Iop_ShrN64x2
: shr
= True
; size
= 64; break;
23047 default: vassert(0);
23054 binop(Iop_CmpLT64U
, mkexpr(amt
), mkU64(size
)),
23055 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
23064 binop(Iop_CmpLT64U
, mkexpr(amt
), mkU64(size
)),
23065 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
23066 binop(op
, mkexpr(g0
), mkU8(size
-1))
23073 putYMMRegLoAndZU( rG
, mkexpr(g1
) );
23078 /* Vector by scalar shift of V by the amount specified at the bottom
23080 static ULong
dis_AVX256_shiftV_byE ( const VexAbiInfo
* vbi
,
23081 Prefix pfx
, Long delta
,
23082 const HChar
* opname
, IROp op
)
23087 Bool shl
, shr
, sar
;
23088 UChar modrm
= getUChar(delta
);
23089 UInt rG
= gregOfRexRM(pfx
,modrm
);
23090 UInt rV
= getVexNvvvv(pfx
);;
23091 IRTemp g0
= newTemp(Ity_V256
);
23092 IRTemp g1
= newTemp(Ity_V256
);
23093 IRTemp amt
= newTemp(Ity_I64
);
23094 IRTemp amt8
= newTemp(Ity_I8
);
23095 if (epartIsReg(modrm
)) {
23096 UInt rE
= eregOfRexRM(pfx
,modrm
);
23097 assign( amt
, getXMMRegLane64(rE
, 0) );
23098 DIP("%s %s,%s,%s\n", opname
, nameXMMReg(rE
),
23099 nameYMMReg(rV
), nameYMMReg(rG
) );
23102 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23103 assign( amt
, loadLE(Ity_I64
, mkexpr(addr
)) );
23104 DIP("%s %s,%s,%s\n", opname
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
) );
23107 assign( g0
, getYMMReg(rV
) );
23108 assign( amt8
, unop(Iop_64to8
, mkexpr(amt
)) );
23110 shl
= shr
= sar
= False
;
23113 case Iop_ShlN16x16
: shl
= True
; size
= 32; break;
23114 case Iop_ShlN32x8
: shl
= True
; size
= 32; break;
23115 case Iop_ShlN64x4
: shl
= True
; size
= 64; break;
23116 case Iop_SarN16x16
: sar
= True
; size
= 16; break;
23117 case Iop_SarN32x8
: sar
= True
; size
= 32; break;
23118 case Iop_ShrN16x16
: shr
= True
; size
= 16; break;
23119 case Iop_ShrN32x8
: shr
= True
; size
= 32; break;
23120 case Iop_ShrN64x4
: shr
= True
; size
= 64; break;
23121 default: vassert(0);
23128 binop(Iop_CmpLT64U
, mkexpr(amt
), mkU64(size
)),
23129 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
23130 binop(Iop_V128HLtoV256
, mkV128(0), mkV128(0))
23138 binop(Iop_CmpLT64U
, mkexpr(amt
), mkU64(size
)),
23139 binop(op
, mkexpr(g0
), mkexpr(amt8
)),
23140 binop(op
, mkexpr(g0
), mkU8(size
-1))
23147 putYMMReg( rG
, mkexpr(g1
) );
23152 /* Vector by vector shift of V by the amount specified at the bottom
23153 of E. Vector by vector shifts are defined for all shift amounts,
23154 so not using Iop_S*x* here (and SSE2 doesn't support variable shifts
23156 static ULong
dis_AVX_var_shiftV_byE ( const VexAbiInfo
* vbi
,
23157 Prefix pfx
, Long delta
,
23158 const HChar
* opname
, IROp op
, Bool isYMM
)
23163 UChar modrm
= getUChar(delta
);
23164 UInt rG
= gregOfRexRM(pfx
,modrm
);
23165 UInt rV
= getVexNvvvv(pfx
);;
23166 IRTemp sV
= isYMM
? newTemp(Ity_V256
) : newTemp(Ity_V128
);
23167 IRTemp amt
= isYMM
? newTemp(Ity_V256
) : newTemp(Ity_V128
);
23168 IRTemp amts
[8], sVs
[8], res
[8];
23169 if (epartIsReg(modrm
)) {
23170 UInt rE
= eregOfRexRM(pfx
,modrm
);
23171 assign( amt
, isYMM
? getYMMReg(rE
) : getXMMReg(rE
) );
23173 DIP("%s %s,%s,%s\n", opname
, nameYMMReg(rE
),
23174 nameYMMReg(rV
), nameYMMReg(rG
) );
23176 DIP("%s %s,%s,%s\n", opname
, nameXMMReg(rE
),
23177 nameXMMReg(rV
), nameXMMReg(rG
) );
23181 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23182 assign( amt
, loadLE(isYMM
? Ity_V256
: Ity_V128
, mkexpr(addr
)) );
23184 DIP("%s %s,%s,%s\n", opname
, dis_buf
, nameYMMReg(rV
),
23187 DIP("%s %s,%s,%s\n", opname
, dis_buf
, nameXMMReg(rV
),
23192 assign( sV
, isYMM
? getYMMReg(rV
) : getXMMReg(rV
) );
23196 case Iop_Shl32
: size
= 32; break;
23197 case Iop_Shl64
: size
= 64; break;
23198 case Iop_Sar32
: size
= 32; break;
23199 case Iop_Shr32
: size
= 32; break;
23200 case Iop_Shr64
: size
= 64; break;
23201 default: vassert(0);
23204 for (i
= 0; i
< 8; i
++) {
23205 sVs
[i
] = IRTemp_INVALID
;
23206 amts
[i
] = IRTemp_INVALID
;
23211 breakupV256to32s( sV
, &sVs
[7], &sVs
[6], &sVs
[5], &sVs
[4],
23212 &sVs
[3], &sVs
[2], &sVs
[1], &sVs
[0] );
23213 breakupV256to32s( amt
, &amts
[7], &amts
[6], &amts
[5], &amts
[4],
23214 &amts
[3], &amts
[2], &amts
[1], &amts
[0] );
23216 breakupV128to32s( sV
, &sVs
[3], &sVs
[2], &sVs
[1], &sVs
[0] );
23217 breakupV128to32s( amt
, &amts
[3], &amts
[2], &amts
[1], &amts
[0] );
23222 breakupV256to64s( sV
, &sVs
[3], &sVs
[2], &sVs
[1], &sVs
[0] );
23223 breakupV256to64s( amt
, &amts
[3], &amts
[2], &amts
[1], &amts
[0] );
23225 breakupV128to64s( sV
, &sVs
[1], &sVs
[0] );
23226 breakupV128to64s( amt
, &amts
[1], &amts
[0] );
23229 default: vassert(0);
23231 for (i
= 0; i
< 8; i
++)
23232 if (sVs
[i
] != IRTemp_INVALID
) {
23233 res
[i
] = size
== 32 ? newTemp(Ity_I32
) : newTemp(Ity_I64
);
23236 binop(size
== 32 ? Iop_CmpLT32U
: Iop_CmpLT64U
,
23238 size
== 32 ? mkU32(size
) : mkU64(size
)),
23239 binop(op
, mkexpr(sVs
[i
]),
23240 unop(size
== 32 ? Iop_32to8
: Iop_64to8
,
23242 op
== Iop_Sar32
? binop(op
, mkexpr(sVs
[i
]), mkU8(size
-1))
23243 : size
== 32 ? mkU32(0) : mkU64(0)
23248 for (i
= 0; i
< 8; i
++)
23249 putYMMRegLane32( rG
, i
, (i
< 4 || isYMM
)
23250 ? mkexpr(res
[i
]) : mkU32(0) );
23253 for (i
= 0; i
< 4; i
++)
23254 putYMMRegLane64( rG
, i
, (i
< 2 || isYMM
)
23255 ? mkexpr(res
[i
]) : mkU64(0) );
23257 default: vassert(0);
23264 /* Vector by scalar shift of E into V, by an immediate byte. Modified
23265 version of dis_SSE_shiftE_imm. */
23267 Long
dis_AVX128_shiftE_to_V_imm( Prefix pfx
,
23268 Long delta
, const HChar
* opname
, IROp op
)
23270 Bool shl
, shr
, sar
;
23271 UChar rm
= getUChar(delta
);
23272 IRTemp e0
= newTemp(Ity_V128
);
23273 IRTemp e1
= newTemp(Ity_V128
);
23274 UInt rD
= getVexNvvvv(pfx
);
23276 vassert(epartIsReg(rm
));
23277 vassert(gregLO3ofRM(rm
) == 2
23278 || gregLO3ofRM(rm
) == 4 || gregLO3ofRM(rm
) == 6);
23279 amt
= getUChar(delta
+1);
23281 DIP("%s $%d,%s,%s\n", opname
,
23283 nameXMMReg(eregOfRexRM(pfx
,rm
)),
23285 assign( e0
, getXMMReg(eregOfRexRM(pfx
,rm
)) );
23287 shl
= shr
= sar
= False
;
23290 case Iop_ShlN16x8
: shl
= True
; size
= 16; break;
23291 case Iop_ShlN32x4
: shl
= True
; size
= 32; break;
23292 case Iop_ShlN64x2
: shl
= True
; size
= 64; break;
23293 case Iop_SarN16x8
: sar
= True
; size
= 16; break;
23294 case Iop_SarN32x4
: sar
= True
; size
= 32; break;
23295 case Iop_ShrN16x8
: shr
= True
; size
= 16; break;
23296 case Iop_ShrN32x4
: shr
= True
; size
= 32; break;
23297 case Iop_ShrN64x2
: shr
= True
; size
= 64; break;
23298 default: vassert(0);
23302 assign( e1
, amt
>= size
23304 : binop(op
, mkexpr(e0
), mkU8(amt
))
23308 assign( e1
, amt
>= size
23309 ? binop(op
, mkexpr(e0
), mkU8(size
-1))
23310 : binop(op
, mkexpr(e0
), mkU8(amt
))
23316 putYMMRegLoAndZU( rD
, mkexpr(e1
) );
23321 /* Vector by scalar shift of E into V, by an immediate byte. Modified
23322 version of dis_AVX128_shiftE_to_V_imm. */
23324 Long
dis_AVX256_shiftE_to_V_imm( Prefix pfx
,
23325 Long delta
, const HChar
* opname
, IROp op
)
23327 Bool shl
, shr
, sar
;
23328 UChar rm
= getUChar(delta
);
23329 IRTemp e0
= newTemp(Ity_V256
);
23330 IRTemp e1
= newTemp(Ity_V256
);
23331 UInt rD
= getVexNvvvv(pfx
);
23333 vassert(epartIsReg(rm
));
23334 vassert(gregLO3ofRM(rm
) == 2
23335 || gregLO3ofRM(rm
) == 4 || gregLO3ofRM(rm
) == 6);
23336 amt
= getUChar(delta
+1);
23338 DIP("%s $%d,%s,%s\n", opname
,
23340 nameYMMReg(eregOfRexRM(pfx
,rm
)),
23342 assign( e0
, getYMMReg(eregOfRexRM(pfx
,rm
)) );
23344 shl
= shr
= sar
= False
;
23347 case Iop_ShlN16x16
: shl
= True
; size
= 16; break;
23348 case Iop_ShlN32x8
: shl
= True
; size
= 32; break;
23349 case Iop_ShlN64x4
: shl
= True
; size
= 64; break;
23350 case Iop_SarN16x16
: sar
= True
; size
= 16; break;
23351 case Iop_SarN32x8
: sar
= True
; size
= 32; break;
23352 case Iop_ShrN16x16
: shr
= True
; size
= 16; break;
23353 case Iop_ShrN32x8
: shr
= True
; size
= 32; break;
23354 case Iop_ShrN64x4
: shr
= True
; size
= 64; break;
23355 default: vassert(0);
23360 assign( e1
, amt
>= size
23361 ? binop(Iop_V128HLtoV256
, mkV128(0), mkV128(0))
23362 : binop(op
, mkexpr(e0
), mkU8(amt
))
23366 assign( e1
, amt
>= size
23367 ? binop(op
, mkexpr(e0
), mkU8(size
-1))
23368 : binop(op
, mkexpr(e0
), mkU8(amt
))
23374 putYMMReg( rD
, mkexpr(e1
) );
23379 /* Lower 64-bit lane only AVX128 binary operation:
23380 G[63:0] = V[63:0] `op` E[63:0]
23381 G[127:64] = V[127:64]
23383 The specified op must be of the 64F0x2 kind, so that it
23384 copies the upper half of the left operand to the result.
23386 static Long
dis_AVX128_E_V_to_G_lo64 ( /*OUT*/Bool
* uses_vvvv
,
23387 const VexAbiInfo
* vbi
,
23388 Prefix pfx
, Long delta
,
23389 const HChar
* opname
, IROp op
)
23394 UChar rm
= getUChar(delta
);
23395 UInt rG
= gregOfRexRM(pfx
,rm
);
23396 UInt rV
= getVexNvvvv(pfx
);
23397 IRExpr
* vpart
= getXMMReg(rV
);
23398 if (epartIsReg(rm
)) {
23399 UInt rE
= eregOfRexRM(pfx
,rm
);
23400 putXMMReg( rG
, binop(op
, vpart
, getXMMReg(rE
)) );
23401 DIP("%s %s,%s,%s\n", opname
,
23402 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
23405 /* We can only do a 64-bit memory read, so the upper half of the
23406 E operand needs to be made simply of zeroes. */
23407 IRTemp epart
= newTemp(Ity_V128
);
23408 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23409 assign( epart
, unop( Iop_64UtoV128
,
23410 loadLE(Ity_I64
, mkexpr(addr
))) );
23411 putXMMReg( rG
, binop(op
, vpart
, mkexpr(epart
)) );
23412 DIP("%s %s,%s,%s\n", opname
,
23413 dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
23414 delta
= delta
+alen
;
23416 putYMMRegLane128( rG
, 1, mkV128(0) );
23422 /* Lower 64-bit lane only AVX128 unary operation:
23423 G[63:0] = op(E[63:0])
23424 G[127:64] = V[127:64]
23426 The specified op must be of the 64F0x2 kind, so that it
23427 copies the upper half of the operand to the result.
23429 static Long
dis_AVX128_E_V_to_G_lo64_unary ( /*OUT*/Bool
* uses_vvvv
,
23430 const VexAbiInfo
* vbi
,
23431 Prefix pfx
, Long delta
,
23432 const HChar
* opname
, IROp op
)
23437 UChar rm
= getUChar(delta
);
23438 UInt rG
= gregOfRexRM(pfx
,rm
);
23439 UInt rV
= getVexNvvvv(pfx
);
23440 IRTemp e64
= newTemp(Ity_I64
);
23442 /* Fetch E[63:0] */
23443 if (epartIsReg(rm
)) {
23444 UInt rE
= eregOfRexRM(pfx
,rm
);
23445 assign(e64
, getXMMRegLane64(rE
, 0));
23446 DIP("%s %s,%s,%s\n", opname
,
23447 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
23450 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23451 assign(e64
, loadLE(Ity_I64
, mkexpr(addr
)));
23452 DIP("%s %s,%s,%s\n", opname
,
23453 dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
23457 /* Create a value 'arg' as V[127:64]++E[63:0] */
23458 IRTemp arg
= newTemp(Ity_V128
);
23460 binop(Iop_SetV128lo64
,
23461 getXMMReg(rV
), mkexpr(e64
)));
23462 /* and apply op to it */
23463 putYMMRegLoAndZU( rG
, unop(op
, mkexpr(arg
)) );
23469 /* Lower 32-bit lane only AVX128 unary operation:
23470 G[31:0] = op(E[31:0])
23471 G[127:32] = V[127:32]
23473 The specified op must be of the 32F0x4 kind, so that it
23474 copies the upper 3/4 of the operand to the result.
23476 static Long
dis_AVX128_E_V_to_G_lo32_unary ( /*OUT*/Bool
* uses_vvvv
,
23477 const VexAbiInfo
* vbi
,
23478 Prefix pfx
, Long delta
,
23479 const HChar
* opname
, IROp op
)
23484 UChar rm
= getUChar(delta
);
23485 UInt rG
= gregOfRexRM(pfx
,rm
);
23486 UInt rV
= getVexNvvvv(pfx
);
23487 IRTemp e32
= newTemp(Ity_I32
);
23489 /* Fetch E[31:0] */
23490 if (epartIsReg(rm
)) {
23491 UInt rE
= eregOfRexRM(pfx
,rm
);
23492 assign(e32
, getXMMRegLane32(rE
, 0));
23493 DIP("%s %s,%s,%s\n", opname
,
23494 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
23497 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23498 assign(e32
, loadLE(Ity_I32
, mkexpr(addr
)));
23499 DIP("%s %s,%s,%s\n", opname
,
23500 dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
23504 /* Create a value 'arg' as V[127:32]++E[31:0] */
23505 IRTemp arg
= newTemp(Ity_V128
);
23507 binop(Iop_SetV128lo32
,
23508 getXMMReg(rV
), mkexpr(e32
)));
23509 /* and apply op to it */
23510 putYMMRegLoAndZU( rG
, unop(op
, mkexpr(arg
)) );
23516 /* Lower 32-bit lane only AVX128 binary operation:
23517 G[31:0] = V[31:0] `op` E[31:0]
23518 G[127:32] = V[127:32]
23520 The specified op must be of the 32F0x4 kind, so that it
23521 copies the upper 3/4 of the left operand to the result.
23523 static Long
dis_AVX128_E_V_to_G_lo32 ( /*OUT*/Bool
* uses_vvvv
,
23524 const VexAbiInfo
* vbi
,
23525 Prefix pfx
, Long delta
,
23526 const HChar
* opname
, IROp op
)
23531 UChar rm
= getUChar(delta
);
23532 UInt rG
= gregOfRexRM(pfx
,rm
);
23533 UInt rV
= getVexNvvvv(pfx
);
23534 IRExpr
* vpart
= getXMMReg(rV
);
23535 if (epartIsReg(rm
)) {
23536 UInt rE
= eregOfRexRM(pfx
,rm
);
23537 putXMMReg( rG
, binop(op
, vpart
, getXMMReg(rE
)) );
23538 DIP("%s %s,%s,%s\n", opname
,
23539 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
23542 /* We can only do a 32-bit memory read, so the upper 3/4 of the
23543 E operand needs to be made simply of zeroes. */
23544 IRTemp epart
= newTemp(Ity_V128
);
23545 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23546 assign( epart
, unop( Iop_32UtoV128
,
23547 loadLE(Ity_I32
, mkexpr(addr
))) );
23548 putXMMReg( rG
, binop(op
, vpart
, mkexpr(epart
)) );
23549 DIP("%s %s,%s,%s\n", opname
,
23550 dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
23551 delta
= delta
+alen
;
23553 putYMMRegLane128( rG
, 1, mkV128(0) );
23559 /* All-lanes AVX128 binary operation:
23560 G[127:0] = V[127:0] `op` E[127:0]
23563 static Long
dis_AVX128_E_V_to_G ( /*OUT*/Bool
* uses_vvvv
,
23564 const VexAbiInfo
* vbi
,
23565 Prefix pfx
, Long delta
,
23566 const HChar
* opname
, IROp op
)
23568 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
23569 uses_vvvv
, vbi
, pfx
, delta
, opname
, op
,
23570 NULL
, False
/*!invertLeftArg*/, False
/*!swapArgs*/
23575 /* Handles AVX128 32F/64F comparisons. A derivative of
23576 dis_SSEcmp_E_to_G. It can fail, in which case it returns the
23577 original delta to indicate failure. */
23579 Long
dis_AVX128_cmp_V_E_to_G ( /*OUT*/Bool
* uses_vvvv
,
23580 const VexAbiInfo
* vbi
,
23581 Prefix pfx
, Long delta
,
23582 const HChar
* opname
, Bool all_lanes
, Int sz
)
23584 vassert(sz
== 4 || sz
== 8);
23585 Long deltaIN
= delta
;
23590 Bool preZero
= False
;
23591 Bool preSwap
= False
;
23592 IROp op
= Iop_INVALID
;
23593 Bool postNot
= False
;
23594 IRTemp plain
= newTemp(Ity_V128
);
23595 UChar rm
= getUChar(delta
);
23596 UInt rG
= gregOfRexRM(pfx
, rm
);
23597 UInt rV
= getVexNvvvv(pfx
);
23598 IRTemp argL
= newTemp(Ity_V128
);
23599 IRTemp argR
= newTemp(Ity_V128
);
23601 assign(argL
, getXMMReg(rV
));
23602 if (epartIsReg(rm
)) {
23603 imm8
= getUChar(delta
+1);
23604 Bool ok
= findSSECmpOp(&preZero
, &preSwap
, &op
, &postNot
,
23605 imm8
, all_lanes
, sz
);
23606 if (!ok
) return deltaIN
; /* FAIL */
23607 UInt rE
= eregOfRexRM(pfx
,rm
);
23608 assign(argR
, getXMMReg(rE
));
23610 DIP("%s $%u,%s,%s,%s\n",
23612 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
23614 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
23615 imm8
= getUChar(delta
+alen
);
23616 Bool ok
= findSSECmpOp(&preZero
, &preSwap
, &op
, &postNot
,
23617 imm8
, all_lanes
, sz
);
23618 if (!ok
) return deltaIN
; /* FAIL */
23620 all_lanes
? loadLE(Ity_V128
, mkexpr(addr
))
23621 : sz
== 8 ? unop( Iop_64UtoV128
, loadLE(Ity_I64
, mkexpr(addr
)))
23622 : /*sz==4*/ unop( Iop_32UtoV128
, loadLE(Ity_I32
, mkexpr(addr
))));
23624 DIP("%s $%u,%s,%s,%s\n",
23625 opname
, imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
23628 IRTemp argMask
= newTemp(Ity_V128
);
23630 // In this case, preSwap is irrelevant, but it's harmless to honour it
23632 assign(argMask
, mkV128(all_lanes
? 0x0000 : (sz
==4 ? 0xFFF0 : 0xFF00)));
23634 assign(argMask
, mkV128(0xFFFF));
23639 preSwap
? binop(op
, binop(Iop_AndV128
, mkexpr(argR
), mkexpr(argMask
)),
23640 binop(Iop_AndV128
, mkexpr(argL
), mkexpr(argMask
)))
23641 : binop(op
, binop(Iop_AndV128
, mkexpr(argL
), mkexpr(argMask
)),
23642 binop(Iop_AndV128
, mkexpr(argR
), mkexpr(argMask
)))
23646 /* This is simple: just invert the result, if necessary, and
23649 putYMMRegLoAndZU( rG
, unop(Iop_NotV128
, mkexpr(plain
)) );
23651 putYMMRegLoAndZU( rG
, mkexpr(plain
) );
23656 /* More complex. It's a one-lane-only, hence need to possibly
23657 invert only that one lane. But at least the other lanes are
23658 correctly "in" the result, having been copied from the left
23661 IRExpr
* mask
= mkV128(sz
==4 ? 0x000F : 0x00FF);
23662 putYMMRegLoAndZU( rG
, binop(Iop_XorV128
, mkexpr(plain
),
23665 putYMMRegLoAndZU( rG
, mkexpr(plain
) );
23669 /* This is the most complex case. One-lane-only, but the args
23670 were swapped. So we have to possibly invert the bottom lane,
23671 and (definitely) we have to copy the upper lane(s) from argL
23672 since, due to the swapping, what's currently there is from
23673 argR, which is not correct. */
23674 IRTemp res
= newTemp(Ity_V128
);
23675 IRTemp mask
= newTemp(Ity_V128
);
23676 IRTemp notMask
= newTemp(Ity_V128
);
23677 assign(mask
, mkV128(sz
==4 ? 0x000F : 0x00FF));
23678 assign(notMask
, mkV128(sz
==4 ? 0xFFF0 : 0xFF00));
23683 unop(Iop_NotV128
, mkexpr(plain
)),
23685 binop(Iop_AndV128
, mkexpr(argL
), mkexpr(notMask
))));
23692 binop(Iop_AndV128
, mkexpr(argL
), mkexpr(notMask
))));
23694 putYMMRegLoAndZU( rG
, mkexpr(res
) );
23702 /* Handles AVX256 32F/64F comparisons. A derivative of
23703 dis_SSEcmp_E_to_G. It can fail, in which case it returns the
23704 original delta to indicate failure. */
23706 Long
dis_AVX256_cmp_V_E_to_G ( /*OUT*/Bool
* uses_vvvv
,
23707 const VexAbiInfo
* vbi
,
23708 Prefix pfx
, Long delta
,
23709 const HChar
* opname
, Int sz
)
23711 vassert(sz
== 4 || sz
== 8);
23712 Long deltaIN
= delta
;
23717 Bool preZero
= False
;
23718 Bool preSwap
= False
;
23719 IROp op
= Iop_INVALID
;
23720 Bool postNot
= False
;
23721 IRTemp plain
= newTemp(Ity_V256
);
23722 UChar rm
= getUChar(delta
);
23723 UInt rG
= gregOfRexRM(pfx
, rm
);
23724 UInt rV
= getVexNvvvv(pfx
);
23725 IRTemp argL
= newTemp(Ity_V256
);
23726 IRTemp argR
= newTemp(Ity_V256
);
23727 IRTemp argLhi
= IRTemp_INVALID
;
23728 IRTemp argLlo
= IRTemp_INVALID
;
23729 IRTemp argRhi
= IRTemp_INVALID
;
23730 IRTemp argRlo
= IRTemp_INVALID
;
23732 assign(argL
, getYMMReg(rV
));
23733 if (epartIsReg(rm
)) {
23734 imm8
= getUChar(delta
+1);
23735 Bool ok
= findSSECmpOp(&preZero
, &preSwap
, &op
, &postNot
, imm8
,
23736 True
/*all_lanes*/, sz
);
23737 if (!ok
) return deltaIN
; /* FAIL */
23738 UInt rE
= eregOfRexRM(pfx
,rm
);
23739 assign(argR
, getYMMReg(rE
));
23741 DIP("%s $%u,%s,%s,%s\n",
23743 nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
23745 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
23746 imm8
= getUChar(delta
+alen
);
23747 Bool ok
= findSSECmpOp(&preZero
, &preSwap
, &op
, &postNot
, imm8
,
23748 True
/*all_lanes*/, sz
);
23749 if (!ok
) return deltaIN
; /* FAIL */
23750 assign(argR
, loadLE(Ity_V256
, mkexpr(addr
)) );
23752 DIP("%s $%u,%s,%s,%s\n",
23753 opname
, imm8
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
23756 breakupV256toV128s( preSwap
? argR
: argL
, &argLhi
, &argLlo
);
23757 breakupV256toV128s( preSwap
? argL
: argR
, &argRhi
, &argRlo
);
23759 IRTemp argMask
= newTemp(Ity_V128
);
23761 // In this case, preSwap is irrelevant, but it's harmless to honour it
23763 assign(argMask
, mkV128(0x0000));
23765 assign(argMask
, mkV128(0xFFFF));
23770 binop( Iop_V128HLtoV256
,
23771 binop(op
, binop(Iop_AndV128
, mkexpr(argLhi
), mkexpr(argMask
)),
23772 binop(Iop_AndV128
, mkexpr(argRhi
), mkexpr(argMask
))),
23773 binop(op
, binop(Iop_AndV128
, mkexpr(argLlo
), mkexpr(argMask
)),
23774 binop(Iop_AndV128
, mkexpr(argRlo
), mkexpr(argMask
))))
23777 /* This is simple: just invert the result, if necessary, and
23780 putYMMReg( rG
, unop(Iop_NotV256
, mkexpr(plain
)) );
23782 putYMMReg( rG
, mkexpr(plain
) );
23790 /* Handles AVX128 unary E-to-G all-lanes operations. */
23792 Long
dis_AVX128_E_to_G_unary ( /*OUT*/Bool
* uses_vvvv
,
23793 const VexAbiInfo
* vbi
,
23794 Prefix pfx
, Long delta
,
23795 const HChar
* opname
,
23796 IRTemp (*opFn
)(IRTemp
) )
23801 IRTemp res
= newTemp(Ity_V128
);
23802 IRTemp arg
= newTemp(Ity_V128
);
23803 UChar rm
= getUChar(delta
);
23804 UInt rG
= gregOfRexRM(pfx
, rm
);
23805 if (epartIsReg(rm
)) {
23806 UInt rE
= eregOfRexRM(pfx
,rm
);
23807 assign(arg
, getXMMReg(rE
));
23809 DIP("%s %s,%s\n", opname
, nameXMMReg(rE
), nameXMMReg(rG
));
23811 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23812 assign(arg
, loadLE(Ity_V128
, mkexpr(addr
)));
23814 DIP("%s %s,%s\n", opname
, dis_buf
, nameXMMReg(rG
));
23817 putYMMRegLoAndZU( rG
, mkexpr(res
) );
23818 *uses_vvvv
= False
;
23823 /* Handles AVX128 unary E-to-G all-lanes operations. */
23825 Long
dis_AVX128_E_to_G_unary_all ( /*OUT*/Bool
* uses_vvvv
,
23826 const VexAbiInfo
* vbi
,
23827 Prefix pfx
, Long delta
,
23828 const HChar
* opname
, IROp op
)
23833 IRTemp arg
= newTemp(Ity_V128
);
23834 UChar rm
= getUChar(delta
);
23835 UInt rG
= gregOfRexRM(pfx
, rm
);
23836 if (epartIsReg(rm
)) {
23837 UInt rE
= eregOfRexRM(pfx
,rm
);
23838 assign(arg
, getXMMReg(rE
));
23840 DIP("%s %s,%s\n", opname
, nameXMMReg(rE
), nameXMMReg(rG
));
23842 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23843 assign(arg
, loadLE(Ity_V128
, mkexpr(addr
)));
23845 DIP("%s %s,%s\n", opname
, dis_buf
, nameXMMReg(rG
));
23847 // Sqrt32Fx4 and Sqrt64Fx2 take a rounding mode, which is faked
23848 // up in the usual way.
23849 Bool needsIRRM
= op
== Iop_Sqrt32Fx4
|| op
== Iop_Sqrt64Fx2
;
23850 /* XXXROUNDINGFIXME */
23851 IRExpr
* res
= needsIRRM
? binop(op
, get_FAKE_roundingmode(), mkexpr(arg
))
23852 : unop(op
, mkexpr(arg
));
23853 putYMMRegLoAndZU( rG
, res
);
23854 *uses_vvvv
= False
;
23859 /* FIXME: common up with the _128_ version above? */
23861 Long
dis_VEX_NDS_256_AnySimdPfx_0F_WIG (
23862 /*OUT*/Bool
* uses_vvvv
, const VexAbiInfo
* vbi
,
23863 Prefix pfx
, Long delta
, const HChar
* name
,
23864 /* The actual operation. Use either 'op' or 'opfn',
23866 IROp op
, IRTemp(*opFn
)(IRTemp
,IRTemp
),
23867 Bool invertLeftArg
,
23871 UChar modrm
= getUChar(delta
);
23872 UInt rD
= gregOfRexRM(pfx
, modrm
);
23873 UInt rSL
= getVexNvvvv(pfx
);
23874 IRTemp tSL
= newTemp(Ity_V256
);
23875 IRTemp tSR
= newTemp(Ity_V256
);
23876 IRTemp addr
= IRTemp_INVALID
;
23879 vassert(1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*WIG?*/);
23881 assign(tSL
, invertLeftArg
? unop(Iop_NotV256
, getYMMReg(rSL
))
23884 if (epartIsReg(modrm
)) {
23885 UInt rSR
= eregOfRexRM(pfx
, modrm
);
23887 assign(tSR
, getYMMReg(rSR
));
23888 DIP("%s %s,%s,%s\n",
23889 name
, nameYMMReg(rSR
), nameYMMReg(rSL
), nameYMMReg(rD
));
23891 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23893 assign(tSR
, loadLE(Ity_V256
, mkexpr(addr
)));
23894 DIP("%s %s,%s,%s\n",
23895 name
, dis_buf
, nameYMMReg(rSL
), nameYMMReg(rD
));
23898 IRTemp res
= IRTemp_INVALID
;
23899 if (op
!= Iop_INVALID
) {
23900 vassert(opFn
== NULL
);
23901 res
= newTemp(Ity_V256
);
23902 if (requiresRMode(op
)) {
23903 IRTemp rm
= newTemp(Ity_I32
);
23904 assign(rm
, get_FAKE_roundingmode()); /* XXXROUNDINGFIXME */
23905 assign(res
, swapArgs
23906 ? triop(op
, mkexpr(rm
), mkexpr(tSR
), mkexpr(tSL
))
23907 : triop(op
, mkexpr(rm
), mkexpr(tSL
), mkexpr(tSR
)));
23909 assign(res
, swapArgs
23910 ? binop(op
, mkexpr(tSR
), mkexpr(tSL
))
23911 : binop(op
, mkexpr(tSL
), mkexpr(tSR
)));
23914 vassert(opFn
!= NULL
);
23915 res
= swapArgs
? opFn(tSR
, tSL
) : opFn(tSL
, tSR
);
23918 putYMMReg(rD
, mkexpr(res
));
23925 /* All-lanes AVX256 binary operation:
23926 G[255:0] = V[255:0] `op` E[255:0]
23928 static Long
dis_AVX256_E_V_to_G ( /*OUT*/Bool
* uses_vvvv
,
23929 const VexAbiInfo
* vbi
,
23930 Prefix pfx
, Long delta
,
23931 const HChar
* opname
, IROp op
)
23933 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
23934 uses_vvvv
, vbi
, pfx
, delta
, opname
, op
,
23935 NULL
, False
/*!invertLeftArg*/, False
/*!swapArgs*/
23940 /* Handle a VEX_NDS_256_66_0F_WIG (3-addr) insn, with a simple IROp
23941 for the operation, no inversion of the left arg, and no swapping of
23944 Long
dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple (
23945 /*OUT*/Bool
* uses_vvvv
, const VexAbiInfo
* vbi
,
23946 Prefix pfx
, Long delta
, const HChar
* name
,
23950 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
23951 uses_vvvv
, vbi
, pfx
, delta
, name
, op
, NULL
, False
, False
);
23955 /* Handle a VEX_NDS_256_66_0F_WIG (3-addr) insn, using the given IR
23956 generator to compute the result, no inversion of the left
23957 arg, and no swapping of args. */
23959 Long
dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex (
23960 /*OUT*/Bool
* uses_vvvv
, const VexAbiInfo
* vbi
,
23961 Prefix pfx
, Long delta
, const HChar
* name
,
23962 IRTemp(*opFn
)(IRTemp
,IRTemp
)
23965 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
23966 uses_vvvv
, vbi
, pfx
, delta
, name
,
23967 Iop_INVALID
, opFn
, False
, False
);
23971 /* Handles AVX256 unary E-to-G all-lanes operations. */
23973 Long
dis_AVX256_E_to_G_unary ( /*OUT*/Bool
* uses_vvvv
,
23974 const VexAbiInfo
* vbi
,
23975 Prefix pfx
, Long delta
,
23976 const HChar
* opname
,
23977 IRTemp (*opFn
)(IRTemp
) )
23982 IRTemp res
= newTemp(Ity_V256
);
23983 IRTemp arg
= newTemp(Ity_V256
);
23984 UChar rm
= getUChar(delta
);
23985 UInt rG
= gregOfRexRM(pfx
, rm
);
23986 if (epartIsReg(rm
)) {
23987 UInt rE
= eregOfRexRM(pfx
,rm
);
23988 assign(arg
, getYMMReg(rE
));
23990 DIP("%s %s,%s\n", opname
, nameYMMReg(rE
), nameYMMReg(rG
));
23992 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
23993 assign(arg
, loadLE(Ity_V256
, mkexpr(addr
)));
23995 DIP("%s %s,%s\n", opname
, dis_buf
, nameYMMReg(rG
));
23998 putYMMReg( rG
, mkexpr(res
) );
23999 *uses_vvvv
= False
;
24004 /* Handles AVX256 unary E-to-G all-lanes operations. */
24006 Long
dis_AVX256_E_to_G_unary_all ( /*OUT*/Bool
* uses_vvvv
,
24007 const VexAbiInfo
* vbi
,
24008 Prefix pfx
, Long delta
,
24009 const HChar
* opname
, IROp op
)
24014 IRTemp arg
= newTemp(Ity_V256
);
24015 UChar rm
= getUChar(delta
);
24016 UInt rG
= gregOfRexRM(pfx
, rm
);
24017 if (epartIsReg(rm
)) {
24018 UInt rE
= eregOfRexRM(pfx
,rm
);
24019 assign(arg
, getYMMReg(rE
));
24021 DIP("%s %s,%s\n", opname
, nameYMMReg(rE
), nameYMMReg(rG
));
24023 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24024 assign(arg
, loadLE(Ity_V256
, mkexpr(addr
)));
24026 DIP("%s %s,%s\n", opname
, dis_buf
, nameYMMReg(rG
));
24028 putYMMReg( rG
, unop(op
, mkexpr(arg
)) );
24029 *uses_vvvv
= False
;
24034 /* The use of ReinterpF64asI64 is ugly. Surely could do better if we
24035 had a variant of Iop_64x4toV256 that took F64s as args instead. */
24036 static Long
dis_CVTDQ2PD_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
24039 IRTemp addr
= IRTemp_INVALID
;
24042 UChar modrm
= getUChar(delta
);
24043 IRTemp sV
= newTemp(Ity_V128
);
24044 UInt rG
= gregOfRexRM(pfx
,modrm
);
24045 if (epartIsReg(modrm
)) {
24046 UInt rE
= eregOfRexRM(pfx
,modrm
);
24047 assign( sV
, getXMMReg(rE
) );
24049 DIP("vcvtdq2pd %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
));
24051 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24052 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
24054 DIP("vcvtdq2pd %s,%s\n", dis_buf
, nameYMMReg(rG
) );
24056 IRTemp s3
, s2
, s1
, s0
;
24057 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
24058 breakupV128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
24062 unop(Iop_ReinterpF64asI64
, unop(Iop_I32StoF64
, mkexpr(s3
))),
24063 unop(Iop_ReinterpF64asI64
, unop(Iop_I32StoF64
, mkexpr(s2
))),
24064 unop(Iop_ReinterpF64asI64
, unop(Iop_I32StoF64
, mkexpr(s1
))),
24065 unop(Iop_ReinterpF64asI64
, unop(Iop_I32StoF64
, mkexpr(s0
)))
24067 putYMMReg(rG
, res
);
24072 static Long
dis_CVTPD2PS_256 ( const VexAbiInfo
* vbi
, Prefix pfx
,
24075 IRTemp addr
= IRTemp_INVALID
;
24078 UChar modrm
= getUChar(delta
);
24079 UInt rG
= gregOfRexRM(pfx
,modrm
);
24080 IRTemp argV
= newTemp(Ity_V256
);
24081 IRTemp rmode
= newTemp(Ity_I32
);
24082 if (epartIsReg(modrm
)) {
24083 UInt rE
= eregOfRexRM(pfx
,modrm
);
24084 assign( argV
, getYMMReg(rE
) );
24086 DIP("vcvtpd2psy %s,%s\n", nameYMMReg(rE
), nameXMMReg(rG
));
24088 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24089 assign( argV
, loadLE(Ity_V256
, mkexpr(addr
)) );
24091 DIP("vcvtpd2psy %s,%s\n", dis_buf
, nameXMMReg(rG
) );
24094 assign( rmode
, get_sse_roundingmode() );
24095 IRTemp t3
, t2
, t1
, t0
;
24096 t3
= t2
= t1
= t0
= IRTemp_INVALID
;
24097 breakupV256to64s( argV
, &t3
, &t2
, &t1
, &t0
);
24098 # define CVT(_t) binop( Iop_F64toF32, mkexpr(rmode), \
24099 unop(Iop_ReinterpI64asF64, mkexpr(_t)) )
24100 putXMMRegLane32F( rG
, 3, CVT(t3
) );
24101 putXMMRegLane32F( rG
, 2, CVT(t2
) );
24102 putXMMRegLane32F( rG
, 1, CVT(t1
) );
24103 putXMMRegLane32F( rG
, 0, CVT(t0
) );
24105 putYMMRegLane128( rG
, 1, mkV128(0) );
24110 static IRTemp
math_VPUNPCK_YMM ( IRTemp tL
, IRType tR
, IROp op
)
24112 IRTemp tLhi
, tLlo
, tRhi
, tRlo
;
24113 tLhi
= tLlo
= tRhi
= tRlo
= IRTemp_INVALID
;
24114 IRTemp res
= newTemp(Ity_V256
);
24115 breakupV256toV128s( tL
, &tLhi
, &tLlo
);
24116 breakupV256toV128s( tR
, &tRhi
, &tRlo
);
24117 assign( res
, binop( Iop_V128HLtoV256
,
24118 binop( op
, mkexpr(tRhi
), mkexpr(tLhi
) ),
24119 binop( op
, mkexpr(tRlo
), mkexpr(tLlo
) ) ) );
24124 static IRTemp
math_VPUNPCKLBW_YMM ( IRTemp tL
, IRTemp tR
)
24126 return math_VPUNPCK_YMM( tL
, tR
, Iop_InterleaveLO8x16
);
24130 static IRTemp
math_VPUNPCKLWD_YMM ( IRTemp tL
, IRTemp tR
)
24132 return math_VPUNPCK_YMM( tL
, tR
, Iop_InterleaveLO16x8
);
24136 static IRTemp
math_VPUNPCKLDQ_YMM ( IRTemp tL
, IRTemp tR
)
24138 return math_VPUNPCK_YMM( tL
, tR
, Iop_InterleaveLO32x4
);
24142 static IRTemp
math_VPUNPCKLQDQ_YMM ( IRTemp tL
, IRTemp tR
)
24144 return math_VPUNPCK_YMM( tL
, tR
, Iop_InterleaveLO64x2
);
24148 static IRTemp
math_VPUNPCKHBW_YMM ( IRTemp tL
, IRTemp tR
)
24150 return math_VPUNPCK_YMM( tL
, tR
, Iop_InterleaveHI8x16
);
24154 static IRTemp
math_VPUNPCKHWD_YMM ( IRTemp tL
, IRTemp tR
)
24156 return math_VPUNPCK_YMM( tL
, tR
, Iop_InterleaveHI16x8
);
24160 static IRTemp
math_VPUNPCKHDQ_YMM ( IRTemp tL
, IRTemp tR
)
24162 return math_VPUNPCK_YMM( tL
, tR
, Iop_InterleaveHI32x4
);
24166 static IRTemp
math_VPUNPCKHQDQ_YMM ( IRTemp tL
, IRTemp tR
)
24168 return math_VPUNPCK_YMM( tL
, tR
, Iop_InterleaveHI64x2
);
24172 static IRTemp
math_VPACKSSWB_YMM ( IRTemp tL
, IRTemp tR
)
24174 return math_VPUNPCK_YMM( tL
, tR
, Iop_QNarrowBin16Sto8Sx16
);
24178 static IRTemp
math_VPACKUSWB_YMM ( IRTemp tL
, IRTemp tR
)
24180 return math_VPUNPCK_YMM( tL
, tR
, Iop_QNarrowBin16Sto8Ux16
);
24184 static IRTemp
math_VPACKSSDW_YMM ( IRTemp tL
, IRTemp tR
)
24186 return math_VPUNPCK_YMM( tL
, tR
, Iop_QNarrowBin32Sto16Sx8
);
24190 static IRTemp
math_VPACKUSDW_YMM ( IRTemp tL
, IRTemp tR
)
24192 return math_VPUNPCK_YMM( tL
, tR
, Iop_QNarrowBin32Sto16Ux8
);
24196 __attribute__((noinline
))
24198 Long
dis_ESC_0F__VEX (
24199 /*MB_OUT*/DisResult
* dres
,
24200 /*OUT*/ Bool
* uses_vvvv
,
24201 const VexArchInfo
* archinfo
,
24202 const VexAbiInfo
* vbi
,
24203 Prefix pfx
, Int sz
, Long deltaIN
24206 IRTemp addr
= IRTemp_INVALID
;
24209 Long delta
= deltaIN
;
24210 UChar opc
= getUChar(delta
);
24212 *uses_vvvv
= False
;
24217 /* VMOVSD m64, xmm1 = VEX.LIG.F2.0F.WIG 10 /r */
24218 /* Move 64 bits from E (mem only) to G (lo half xmm).
24219 Bits 255-64 of the dest are zeroed out. */
24220 if (haveF2no66noF3(pfx
) && !epartIsReg(getUChar(delta
))) {
24221 UChar modrm
= getUChar(delta
);
24222 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24223 UInt rG
= gregOfRexRM(pfx
,modrm
);
24224 IRTemp z128
= newTemp(Ity_V128
);
24225 assign(z128
, mkV128(0));
24226 putXMMReg( rG
, mkexpr(z128
) );
24227 /* FIXME: ALIGNMENT CHECK? */
24228 putXMMRegLane64( rG
, 0, loadLE(Ity_I64
, mkexpr(addr
)) );
24229 putYMMRegLane128( rG
, 1, mkexpr(z128
) );
24230 DIP("vmovsd %s,%s\n", dis_buf
, nameXMMReg(rG
));
24232 goto decode_success
;
24234 /* VMOVSD xmm3, xmm2, xmm1 = VEX.LIG.F2.0F.WIG 10 /r */
24236 if (haveF2no66noF3(pfx
) && epartIsReg(getUChar(delta
))) {
24237 UChar modrm
= getUChar(delta
);
24238 UInt rG
= gregOfRexRM(pfx
, modrm
);
24239 UInt rE
= eregOfRexRM(pfx
, modrm
);
24240 UInt rV
= getVexNvvvv(pfx
);
24242 DIP("vmovsd %s,%s,%s\n",
24243 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
24244 IRTemp res
= newTemp(Ity_V128
);
24245 assign(res
, binop(Iop_64HLtoV128
,
24246 getXMMRegLane64(rV
, 1),
24247 getXMMRegLane64(rE
, 0)));
24248 putYMMRegLoAndZU(rG
, mkexpr(res
));
24250 goto decode_success
;
24252 /* VMOVSS m32, xmm1 = VEX.LIG.F3.0F.WIG 10 /r */
24253 /* Move 32 bits from E (mem only) to G (lo half xmm).
24254 Bits 255-32 of the dest are zeroed out. */
24255 if (haveF3no66noF2(pfx
) && !epartIsReg(getUChar(delta
))) {
24256 UChar modrm
= getUChar(delta
);
24257 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24258 UInt rG
= gregOfRexRM(pfx
,modrm
);
24259 IRTemp z128
= newTemp(Ity_V128
);
24260 assign(z128
, mkV128(0));
24261 putXMMReg( rG
, mkexpr(z128
) );
24262 /* FIXME: ALIGNMENT CHECK? */
24263 putXMMRegLane32( rG
, 0, loadLE(Ity_I32
, mkexpr(addr
)) );
24264 putYMMRegLane128( rG
, 1, mkexpr(z128
) );
24265 DIP("vmovss %s,%s\n", dis_buf
, nameXMMReg(rG
));
24267 goto decode_success
;
24269 /* VMOVSS xmm3, xmm2, xmm1 = VEX.LIG.F3.0F.WIG 10 /r */
24271 if (haveF3no66noF2(pfx
) && epartIsReg(getUChar(delta
))) {
24272 UChar modrm
= getUChar(delta
);
24273 UInt rG
= gregOfRexRM(pfx
, modrm
);
24274 UInt rE
= eregOfRexRM(pfx
, modrm
);
24275 UInt rV
= getVexNvvvv(pfx
);
24277 DIP("vmovss %s,%s,%s\n",
24278 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
24279 IRTemp res
= newTemp(Ity_V128
);
24280 assign( res
, binop( Iop_64HLtoV128
,
24281 getXMMRegLane64(rV
, 1),
24282 binop(Iop_32HLto64
,
24283 getXMMRegLane32(rV
, 1),
24284 getXMMRegLane32(rE
, 0)) ) );
24285 putYMMRegLoAndZU(rG
, mkexpr(res
));
24287 goto decode_success
;
24289 /* VMOVUPD xmm2/m128, xmm1 = VEX.128.66.0F.WIG 10 /r */
24290 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24291 UChar modrm
= getUChar(delta
);
24292 UInt rG
= gregOfRexRM(pfx
, modrm
);
24293 if (epartIsReg(modrm
)) {
24294 UInt rE
= eregOfRexRM(pfx
,modrm
);
24295 putYMMRegLoAndZU( rG
, getXMMReg( rE
));
24296 DIP("vmovupd %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
24299 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24300 putYMMRegLoAndZU( rG
, loadLE(Ity_V128
, mkexpr(addr
)) );
24301 DIP("vmovupd %s,%s\n", dis_buf
, nameXMMReg(rG
));
24304 goto decode_success
;
24306 /* VMOVUPD ymm2/m256, ymm1 = VEX.256.66.0F.WIG 10 /r */
24307 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24308 UChar modrm
= getUChar(delta
);
24309 UInt rG
= gregOfRexRM(pfx
, modrm
);
24310 if (epartIsReg(modrm
)) {
24311 UInt rE
= eregOfRexRM(pfx
,modrm
);
24312 putYMMReg( rG
, getYMMReg( rE
));
24313 DIP("vmovupd %s,%s\n", nameYMMReg(rE
), nameYMMReg(rG
));
24316 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24317 putYMMReg( rG
, loadLE(Ity_V256
, mkexpr(addr
)) );
24318 DIP("vmovupd %s,%s\n", dis_buf
, nameYMMReg(rG
));
24321 goto decode_success
;
24323 /* VMOVUPS xmm2/m128, xmm1 = VEX.128.0F.WIG 10 /r */
24324 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24325 UChar modrm
= getUChar(delta
);
24326 UInt rG
= gregOfRexRM(pfx
, modrm
);
24327 if (epartIsReg(modrm
)) {
24328 UInt rE
= eregOfRexRM(pfx
,modrm
);
24329 putYMMRegLoAndZU( rG
, getXMMReg( rE
));
24330 DIP("vmovups %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
24333 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24334 putYMMRegLoAndZU( rG
, loadLE(Ity_V128
, mkexpr(addr
)) );
24335 DIP("vmovups %s,%s\n", dis_buf
, nameXMMReg(rG
));
24338 goto decode_success
;
24340 /* VMOVUPS ymm2/m256, ymm1 = VEX.256.0F.WIG 10 /r */
24341 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24342 UChar modrm
= getUChar(delta
);
24343 UInt rG
= gregOfRexRM(pfx
, modrm
);
24344 if (epartIsReg(modrm
)) {
24345 UInt rE
= eregOfRexRM(pfx
,modrm
);
24346 putYMMReg( rG
, getYMMReg( rE
));
24347 DIP("vmovups %s,%s\n", nameYMMReg(rE
), nameYMMReg(rG
));
24350 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24351 putYMMReg( rG
, loadLE(Ity_V256
, mkexpr(addr
)) );
24352 DIP("vmovups %s,%s\n", dis_buf
, nameYMMReg(rG
));
24355 goto decode_success
;
24360 /* VMOVSD xmm1, m64 = VEX.LIG.F2.0F.WIG 11 /r */
24361 /* Move 64 bits from G (low half xmm) to mem only. */
24362 if (haveF2no66noF3(pfx
) && !epartIsReg(getUChar(delta
))) {
24363 UChar modrm
= getUChar(delta
);
24364 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24365 UInt rG
= gregOfRexRM(pfx
,modrm
);
24366 /* FIXME: ALIGNMENT CHECK? */
24367 storeLE( mkexpr(addr
), getXMMRegLane64(rG
, 0));
24368 DIP("vmovsd %s,%s\n", nameXMMReg(rG
), dis_buf
);
24370 goto decode_success
;
24372 /* VMOVSD xmm3, xmm2, xmm1 = VEX.LIG.F2.0F.WIG 11 /r */
24374 if (haveF2no66noF3(pfx
) && epartIsReg(getUChar(delta
))) {
24375 UChar modrm
= getUChar(delta
);
24376 UInt rG
= gregOfRexRM(pfx
, modrm
);
24377 UInt rE
= eregOfRexRM(pfx
, modrm
);
24378 UInt rV
= getVexNvvvv(pfx
);
24380 DIP("vmovsd %s,%s,%s\n",
24381 nameXMMReg(rG
), nameXMMReg(rV
), nameXMMReg(rE
));
24382 IRTemp res
= newTemp(Ity_V128
);
24383 assign(res
, binop(Iop_64HLtoV128
,
24384 getXMMRegLane64(rV
, 1),
24385 getXMMRegLane64(rG
, 0)));
24386 putYMMRegLoAndZU(rE
, mkexpr(res
));
24388 goto decode_success
;
24390 /* VMOVSS xmm1, m64 = VEX.LIG.F3.0F.WIG 11 /r */
24391 /* Move 32 bits from G (low 1/4 xmm) to mem only. */
24392 if (haveF3no66noF2(pfx
) && !epartIsReg(getUChar(delta
))) {
24393 UChar modrm
= getUChar(delta
);
24394 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24395 UInt rG
= gregOfRexRM(pfx
,modrm
);
24396 /* FIXME: ALIGNMENT CHECK? */
24397 storeLE( mkexpr(addr
), getXMMRegLane32(rG
, 0));
24398 DIP("vmovss %s,%s\n", nameXMMReg(rG
), dis_buf
);
24400 goto decode_success
;
24402 /* VMOVSS xmm3, xmm2, xmm1 = VEX.LIG.F3.0F.WIG 11 /r */
24404 if (haveF3no66noF2(pfx
) && epartIsReg(getUChar(delta
))) {
24405 UChar modrm
= getUChar(delta
);
24406 UInt rG
= gregOfRexRM(pfx
, modrm
);
24407 UInt rE
= eregOfRexRM(pfx
, modrm
);
24408 UInt rV
= getVexNvvvv(pfx
);
24410 DIP("vmovss %s,%s,%s\n",
24411 nameXMMReg(rG
), nameXMMReg(rV
), nameXMMReg(rE
));
24412 IRTemp res
= newTemp(Ity_V128
);
24413 assign( res
, binop( Iop_64HLtoV128
,
24414 getXMMRegLane64(rV
, 1),
24415 binop(Iop_32HLto64
,
24416 getXMMRegLane32(rV
, 1),
24417 getXMMRegLane32(rG
, 0)) ) );
24418 putYMMRegLoAndZU(rE
, mkexpr(res
));
24420 goto decode_success
;
24422 /* VMOVUPD xmm1, xmm2/m128 = VEX.128.66.0F.WIG 11 /r */
24423 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24424 UChar modrm
= getUChar(delta
);
24425 UInt rG
= gregOfRexRM(pfx
,modrm
);
24426 if (epartIsReg(modrm
)) {
24427 UInt rE
= eregOfRexRM(pfx
,modrm
);
24428 putYMMRegLoAndZU( rE
, getXMMReg(rG
) );
24429 DIP("vmovupd %s,%s\n", nameXMMReg(rG
), nameXMMReg(rE
));
24432 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24433 storeLE( mkexpr(addr
), getXMMReg(rG
) );
24434 DIP("vmovupd %s,%s\n", nameXMMReg(rG
), dis_buf
);
24437 goto decode_success
;
24439 /* VMOVUPD ymm1, ymm2/m256 = VEX.256.66.0F.WIG 11 /r */
24440 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24441 UChar modrm
= getUChar(delta
);
24442 UInt rG
= gregOfRexRM(pfx
,modrm
);
24443 if (epartIsReg(modrm
)) {
24444 UInt rE
= eregOfRexRM(pfx
,modrm
);
24445 putYMMReg( rE
, getYMMReg(rG
) );
24446 DIP("vmovupd %s,%s\n", nameYMMReg(rG
), nameYMMReg(rE
));
24449 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24450 storeLE( mkexpr(addr
), getYMMReg(rG
) );
24451 DIP("vmovupd %s,%s\n", nameYMMReg(rG
), dis_buf
);
24454 goto decode_success
;
24456 /* VMOVUPS xmm1, xmm2/m128 = VEX.128.0F.WIG 11 /r */
24457 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24458 UChar modrm
= getUChar(delta
);
24459 UInt rG
= gregOfRexRM(pfx
,modrm
);
24460 if (epartIsReg(modrm
)) {
24461 UInt rE
= eregOfRexRM(pfx
,modrm
);
24462 putYMMRegLoAndZU( rE
, getXMMReg(rG
) );
24463 DIP("vmovups %s,%s\n", nameXMMReg(rG
), nameXMMReg(rE
));
24466 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24467 storeLE( mkexpr(addr
), getXMMReg(rG
) );
24468 DIP("vmovups %s,%s\n", nameXMMReg(rG
), dis_buf
);
24471 goto decode_success
;
24473 /* VMOVUPS ymm1, ymm2/m256 = VEX.256.0F.WIG 11 /r */
24474 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24475 UChar modrm
= getUChar(delta
);
24476 UInt rG
= gregOfRexRM(pfx
,modrm
);
24477 if (epartIsReg(modrm
)) {
24478 UInt rE
= eregOfRexRM(pfx
,modrm
);
24479 putYMMReg( rE
, getYMMReg(rG
) );
24480 DIP("vmovups %s,%s\n", nameYMMReg(rG
), nameYMMReg(rE
));
24483 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24484 storeLE( mkexpr(addr
), getYMMReg(rG
) );
24485 DIP("vmovups %s,%s\n", nameYMMReg(rG
), dis_buf
);
24488 goto decode_success
;
24493 /* VMOVDDUP xmm2/m64, xmm1 = VEX.128.F2.0F.WIG /12 r */
24494 if (haveF2no66noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24495 delta
= dis_MOVDDUP_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
24496 goto decode_success
;
24498 /* VMOVDDUP ymm2/m256, ymm1 = VEX.256.F2.0F.WIG /12 r */
24499 if (haveF2no66noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24500 delta
= dis_MOVDDUP_256( vbi
, pfx
, delta
);
24501 goto decode_success
;
24503 /* VMOVHLPS xmm3, xmm2, xmm1 = VEX.NDS.128.0F.WIG 12 /r */
24504 /* Insn only exists in reg form */
24505 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
24506 && epartIsReg(getUChar(delta
))) {
24507 UChar modrm
= getUChar(delta
);
24508 UInt rG
= gregOfRexRM(pfx
, modrm
);
24509 UInt rE
= eregOfRexRM(pfx
, modrm
);
24510 UInt rV
= getVexNvvvv(pfx
);
24512 DIP("vmovhlps %s,%s,%s\n",
24513 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
24514 IRTemp res
= newTemp(Ity_V128
);
24515 assign(res
, binop(Iop_64HLtoV128
,
24516 getXMMRegLane64(rV
, 1),
24517 getXMMRegLane64(rE
, 1)));
24518 putYMMRegLoAndZU(rG
, mkexpr(res
));
24520 goto decode_success
;
24522 /* VMOVLPS m64, xmm1, xmm2 = VEX.NDS.128.0F.WIG 12 /r */
24523 /* Insn exists only in mem form, it appears. */
24524 /* VMOVLPD m64, xmm1, xmm2 = VEX.NDS.128.66.0F.WIG 12 /r */
24525 /* Insn exists only in mem form, it appears. */
24526 if ((have66noF2noF3(pfx
) || haveNo66noF2noF3(pfx
))
24527 && 0==getVexL(pfx
)/*128*/ && !epartIsReg(getUChar(delta
))) {
24528 UChar modrm
= getUChar(delta
);
24529 UInt rG
= gregOfRexRM(pfx
, modrm
);
24530 UInt rV
= getVexNvvvv(pfx
);
24531 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24533 DIP("vmovlpd %s,%s,%s\n",
24534 dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
24535 IRTemp res
= newTemp(Ity_V128
);
24536 assign(res
, binop(Iop_64HLtoV128
,
24537 getXMMRegLane64(rV
, 1),
24538 loadLE(Ity_I64
, mkexpr(addr
))));
24539 putYMMRegLoAndZU(rG
, mkexpr(res
));
24541 goto decode_success
;
24543 /* VMOVSLDUP xmm2/m128, xmm1 = VEX.NDS.128.F3.0F.WIG 12 /r */
24544 if (haveF3no66noF2(pfx
) && 0==getVexL(pfx
)/*128*/) {
24545 delta
= dis_MOVSxDUP_128( vbi
, pfx
, delta
, True
/*isAvx*/,
24547 goto decode_success
;
24549 /* VMOVSLDUP ymm2/m256, ymm1 = VEX.NDS.256.F3.0F.WIG 12 /r */
24550 if (haveF3no66noF2(pfx
) && 1==getVexL(pfx
)/*256*/) {
24551 delta
= dis_MOVSxDUP_256( vbi
, pfx
, delta
, True
/*isL*/ );
24552 goto decode_success
;
24557 /* VMOVLPS xmm1, m64 = VEX.128.0F.WIG 13 /r */
24558 /* Insn exists only in mem form, it appears. */
24559 /* VMOVLPD xmm1, m64 = VEX.128.66.0F.WIG 13 /r */
24560 /* Insn exists only in mem form, it appears. */
24561 if ((have66noF2noF3(pfx
) || haveNo66noF2noF3(pfx
))
24562 && 0==getVexL(pfx
)/*128*/ && !epartIsReg(getUChar(delta
))) {
24563 UChar modrm
= getUChar(delta
);
24564 UInt rG
= gregOfRexRM(pfx
, modrm
);
24565 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24567 storeLE( mkexpr(addr
), getXMMRegLane64( rG
, 0));
24568 DIP("vmovlpd %s,%s\n", nameXMMReg(rG
), dis_buf
);
24569 goto decode_success
;
24575 /* VUNPCKLPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 14 /r */
24576 /* VUNPCKHPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 15 /r */
24577 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24578 Bool hi
= opc
== 0x15;
24579 UChar modrm
= getUChar(delta
);
24580 UInt rG
= gregOfRexRM(pfx
,modrm
);
24581 UInt rV
= getVexNvvvv(pfx
);
24582 IRTemp eV
= newTemp(Ity_V128
);
24583 IRTemp vV
= newTemp(Ity_V128
);
24584 assign( vV
, getXMMReg(rV
) );
24585 if (epartIsReg(modrm
)) {
24586 UInt rE
= eregOfRexRM(pfx
,modrm
);
24587 assign( eV
, getXMMReg(rE
) );
24589 DIP("vunpck%sps %s,%s\n", hi
? "h" : "l",
24590 nameXMMReg(rE
), nameXMMReg(rG
));
24592 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24593 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
24595 DIP("vunpck%sps %s,%s\n", hi
? "h" : "l",
24596 dis_buf
, nameXMMReg(rG
));
24598 IRTemp res
= math_UNPCKxPS_128( eV
, vV
, hi
);
24599 putYMMRegLoAndZU( rG
, mkexpr(res
) );
24601 goto decode_success
;
24603 /* VUNPCKLPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 14 /r */
24604 /* VUNPCKHPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 15 /r */
24605 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24606 Bool hi
= opc
== 0x15;
24607 UChar modrm
= getUChar(delta
);
24608 UInt rG
= gregOfRexRM(pfx
,modrm
);
24609 UInt rV
= getVexNvvvv(pfx
);
24610 IRTemp eV
= newTemp(Ity_V256
);
24611 IRTemp vV
= newTemp(Ity_V256
);
24612 assign( vV
, getYMMReg(rV
) );
24613 if (epartIsReg(modrm
)) {
24614 UInt rE
= eregOfRexRM(pfx
,modrm
);
24615 assign( eV
, getYMMReg(rE
) );
24617 DIP("vunpck%sps %s,%s\n", hi
? "h" : "l",
24618 nameYMMReg(rE
), nameYMMReg(rG
));
24620 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24621 assign( eV
, loadLE(Ity_V256
, mkexpr(addr
)) );
24623 DIP("vunpck%sps %s,%s\n", hi
? "h" : "l",
24624 dis_buf
, nameYMMReg(rG
));
24626 IRTemp res
= math_UNPCKxPS_256( eV
, vV
, hi
);
24627 putYMMReg( rG
, mkexpr(res
) );
24629 goto decode_success
;
24631 /* VUNPCKLPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 14 /r */
24632 /* VUNPCKHPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 15 /r */
24633 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24634 Bool hi
= opc
== 0x15;
24635 UChar modrm
= getUChar(delta
);
24636 UInt rG
= gregOfRexRM(pfx
,modrm
);
24637 UInt rV
= getVexNvvvv(pfx
);
24638 IRTemp eV
= newTemp(Ity_V128
);
24639 IRTemp vV
= newTemp(Ity_V128
);
24640 assign( vV
, getXMMReg(rV
) );
24641 if (epartIsReg(modrm
)) {
24642 UInt rE
= eregOfRexRM(pfx
,modrm
);
24643 assign( eV
, getXMMReg(rE
) );
24645 DIP("vunpck%spd %s,%s\n", hi
? "h" : "l",
24646 nameXMMReg(rE
), nameXMMReg(rG
));
24648 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24649 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
24651 DIP("vunpck%spd %s,%s\n", hi
? "h" : "l",
24652 dis_buf
, nameXMMReg(rG
));
24654 IRTemp res
= math_UNPCKxPD_128( eV
, vV
, hi
);
24655 putYMMRegLoAndZU( rG
, mkexpr(res
) );
24657 goto decode_success
;
24659 /* VUNPCKLPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 14 /r */
24660 /* VUNPCKHPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 15 /r */
24661 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24662 Bool hi
= opc
== 0x15;
24663 UChar modrm
= getUChar(delta
);
24664 UInt rG
= gregOfRexRM(pfx
,modrm
);
24665 UInt rV
= getVexNvvvv(pfx
);
24666 IRTemp eV
= newTemp(Ity_V256
);
24667 IRTemp vV
= newTemp(Ity_V256
);
24668 assign( vV
, getYMMReg(rV
) );
24669 if (epartIsReg(modrm
)) {
24670 UInt rE
= eregOfRexRM(pfx
,modrm
);
24671 assign( eV
, getYMMReg(rE
) );
24673 DIP("vunpck%spd %s,%s\n", hi
? "h" : "l",
24674 nameYMMReg(rE
), nameYMMReg(rG
));
24676 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24677 assign( eV
, loadLE(Ity_V256
, mkexpr(addr
)) );
24679 DIP("vunpck%spd %s,%s\n", hi
? "h" : "l",
24680 dis_buf
, nameYMMReg(rG
));
24682 IRTemp res
= math_UNPCKxPD_256( eV
, vV
, hi
);
24683 putYMMReg( rG
, mkexpr(res
) );
24685 goto decode_success
;
24690 /* VMOVLHPS xmm3, xmm2, xmm1 = VEX.NDS.128.0F.WIG 16 /r */
24691 /* Insn only exists in reg form */
24692 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
24693 && epartIsReg(getUChar(delta
))) {
24694 UChar modrm
= getUChar(delta
);
24695 UInt rG
= gregOfRexRM(pfx
, modrm
);
24696 UInt rE
= eregOfRexRM(pfx
, modrm
);
24697 UInt rV
= getVexNvvvv(pfx
);
24699 DIP("vmovlhps %s,%s,%s\n",
24700 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
24701 IRTemp res
= newTemp(Ity_V128
);
24702 assign(res
, binop(Iop_64HLtoV128
,
24703 getXMMRegLane64(rE
, 0),
24704 getXMMRegLane64(rV
, 0)));
24705 putYMMRegLoAndZU(rG
, mkexpr(res
));
24707 goto decode_success
;
24709 /* VMOVHPS m64, xmm1, xmm2 = VEX.NDS.128.0F.WIG 16 /r */
24710 /* Insn exists only in mem form, it appears. */
24711 /* VMOVHPD m64, xmm1, xmm2 = VEX.NDS.128.66.0F.WIG 16 /r */
24712 /* Insn exists only in mem form, it appears. */
24713 if ((have66noF2noF3(pfx
) || haveNo66noF2noF3(pfx
))
24714 && 0==getVexL(pfx
)/*128*/ && !epartIsReg(getUChar(delta
))) {
24715 UChar modrm
= getUChar(delta
);
24716 UInt rG
= gregOfRexRM(pfx
, modrm
);
24717 UInt rV
= getVexNvvvv(pfx
);
24718 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24720 DIP("vmovhp%c %s,%s,%s\n", have66(pfx
) ? 'd' : 's',
24721 dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
24722 IRTemp res
= newTemp(Ity_V128
);
24723 assign(res
, binop(Iop_64HLtoV128
,
24724 loadLE(Ity_I64
, mkexpr(addr
)),
24725 getXMMRegLane64(rV
, 0)));
24726 putYMMRegLoAndZU(rG
, mkexpr(res
));
24728 goto decode_success
;
24730 /* VMOVSHDUP xmm2/m128, xmm1 = VEX.NDS.128.F3.0F.WIG 16 /r */
24731 if (haveF3no66noF2(pfx
) && 0==getVexL(pfx
)/*128*/) {
24732 delta
= dis_MOVSxDUP_128( vbi
, pfx
, delta
, True
/*isAvx*/,
24734 goto decode_success
;
24736 /* VMOVSHDUP ymm2/m256, ymm1 = VEX.NDS.256.F3.0F.WIG 16 /r */
24737 if (haveF3no66noF2(pfx
) && 1==getVexL(pfx
)/*256*/) {
24738 delta
= dis_MOVSxDUP_256( vbi
, pfx
, delta
, False
/*!isL*/ );
24739 goto decode_success
;
24744 /* VMOVHPS xmm1, m64 = VEX.128.0F.WIG 17 /r */
24745 /* Insn exists only in mem form, it appears. */
24746 /* VMOVHPD xmm1, m64 = VEX.128.66.0F.WIG 17 /r */
24747 /* Insn exists only in mem form, it appears. */
24748 if ((have66noF2noF3(pfx
) || haveNo66noF2noF3(pfx
))
24749 && 0==getVexL(pfx
)/*128*/ && !epartIsReg(getUChar(delta
))) {
24750 UChar modrm
= getUChar(delta
);
24751 UInt rG
= gregOfRexRM(pfx
, modrm
);
24752 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24754 storeLE( mkexpr(addr
), getXMMRegLane64( rG
, 1));
24755 DIP("vmovhp%c %s,%s\n", have66(pfx
) ? 'd' : 's',
24756 nameXMMReg(rG
), dis_buf
);
24757 goto decode_success
;
24762 /* VMOVAPD xmm2/m128, xmm1 = VEX.128.66.0F.WIG 28 /r */
24763 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24764 UChar modrm
= getUChar(delta
);
24765 UInt rG
= gregOfRexRM(pfx
, modrm
);
24766 if (epartIsReg(modrm
)) {
24767 UInt rE
= eregOfRexRM(pfx
,modrm
);
24768 putYMMRegLoAndZU( rG
, getXMMReg( rE
));
24769 DIP("vmovapd %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
24772 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24773 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
24774 putYMMRegLoAndZU( rG
, loadLE(Ity_V128
, mkexpr(addr
)) );
24775 DIP("vmovapd %s,%s\n", dis_buf
, nameXMMReg(rG
));
24778 goto decode_success
;
24780 /* VMOVAPD ymm2/m256, ymm1 = VEX.256.66.0F.WIG 28 /r */
24781 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24782 UChar modrm
= getUChar(delta
);
24783 UInt rG
= gregOfRexRM(pfx
, modrm
);
24784 if (epartIsReg(modrm
)) {
24785 UInt rE
= eregOfRexRM(pfx
,modrm
);
24786 putYMMReg( rG
, getYMMReg( rE
));
24787 DIP("vmovapd %s,%s\n", nameYMMReg(rE
), nameYMMReg(rG
));
24790 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24791 gen_SIGNAL_if_not_32_aligned( vbi
, addr
);
24792 putYMMReg( rG
, loadLE(Ity_V256
, mkexpr(addr
)) );
24793 DIP("vmovapd %s,%s\n", dis_buf
, nameYMMReg(rG
));
24796 goto decode_success
;
24798 /* VMOVAPS xmm2/m128, xmm1 = VEX.128.0F.WIG 28 /r */
24799 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24800 UChar modrm
= getUChar(delta
);
24801 UInt rG
= gregOfRexRM(pfx
, modrm
);
24802 if (epartIsReg(modrm
)) {
24803 UInt rE
= eregOfRexRM(pfx
,modrm
);
24804 putYMMRegLoAndZU( rG
, getXMMReg( rE
));
24805 DIP("vmovaps %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
24808 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24809 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
24810 putYMMRegLoAndZU( rG
, loadLE(Ity_V128
, mkexpr(addr
)) );
24811 DIP("vmovaps %s,%s\n", dis_buf
, nameXMMReg(rG
));
24814 goto decode_success
;
24816 /* VMOVAPS ymm2/m256, ymm1 = VEX.256.0F.WIG 28 /r */
24817 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24818 UChar modrm
= getUChar(delta
);
24819 UInt rG
= gregOfRexRM(pfx
, modrm
);
24820 if (epartIsReg(modrm
)) {
24821 UInt rE
= eregOfRexRM(pfx
,modrm
);
24822 putYMMReg( rG
, getYMMReg( rE
));
24823 DIP("vmovaps %s,%s\n", nameYMMReg(rE
), nameYMMReg(rG
));
24826 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24827 gen_SIGNAL_if_not_32_aligned( vbi
, addr
);
24828 putYMMReg( rG
, loadLE(Ity_V256
, mkexpr(addr
)) );
24829 DIP("vmovaps %s,%s\n", dis_buf
, nameYMMReg(rG
));
24832 goto decode_success
;
24837 /* VMOVAPD xmm1, xmm2/m128 = VEX.128.66.0F.WIG 29 /r */
24838 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24839 UChar modrm
= getUChar(delta
);
24840 UInt rG
= gregOfRexRM(pfx
,modrm
);
24841 if (epartIsReg(modrm
)) {
24842 UInt rE
= eregOfRexRM(pfx
,modrm
);
24843 putYMMRegLoAndZU( rE
, getXMMReg(rG
) );
24844 DIP("vmovapd %s,%s\n", nameXMMReg(rG
), nameXMMReg(rE
));
24847 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24848 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
24849 storeLE( mkexpr(addr
), getXMMReg(rG
) );
24850 DIP("vmovapd %s,%s\n", nameXMMReg(rG
), dis_buf
);
24853 goto decode_success
;
24855 /* VMOVAPD ymm1, ymm2/m256 = VEX.256.66.0F.WIG 29 /r */
24856 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24857 UChar modrm
= getUChar(delta
);
24858 UInt rG
= gregOfRexRM(pfx
,modrm
);
24859 if (epartIsReg(modrm
)) {
24860 UInt rE
= eregOfRexRM(pfx
,modrm
);
24861 putYMMReg( rE
, getYMMReg(rG
) );
24862 DIP("vmovapd %s,%s\n", nameYMMReg(rG
), nameYMMReg(rE
));
24865 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24866 gen_SIGNAL_if_not_32_aligned( vbi
, addr
);
24867 storeLE( mkexpr(addr
), getYMMReg(rG
) );
24868 DIP("vmovapd %s,%s\n", nameYMMReg(rG
), dis_buf
);
24871 goto decode_success
;
24873 /* VMOVAPS xmm1, xmm2/m128 = VEX.128.0F.WIG 29 /r */
24874 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
24875 UChar modrm
= getUChar(delta
);
24876 UInt rG
= gregOfRexRM(pfx
,modrm
);
24877 if (epartIsReg(modrm
)) {
24878 UInt rE
= eregOfRexRM(pfx
,modrm
);
24879 putYMMRegLoAndZU( rE
, getXMMReg(rG
) );
24880 DIP("vmovaps %s,%s\n", nameXMMReg(rG
), nameXMMReg(rE
));
24882 goto decode_success
;
24884 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24885 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
24886 storeLE( mkexpr(addr
), getXMMReg(rG
) );
24887 DIP("vmovaps %s,%s\n", nameXMMReg(rG
), dis_buf
);
24889 goto decode_success
;
24892 /* VMOVAPS ymm1, ymm2/m256 = VEX.256.0F.WIG 29 /r */
24893 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
24894 UChar modrm
= getUChar(delta
);
24895 UInt rG
= gregOfRexRM(pfx
,modrm
);
24896 if (epartIsReg(modrm
)) {
24897 UInt rE
= eregOfRexRM(pfx
,modrm
);
24898 putYMMReg( rE
, getYMMReg(rG
) );
24899 DIP("vmovaps %s,%s\n", nameYMMReg(rG
), nameYMMReg(rE
));
24901 goto decode_success
;
24903 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24904 gen_SIGNAL_if_not_32_aligned( vbi
, addr
);
24905 storeLE( mkexpr(addr
), getYMMReg(rG
) );
24906 DIP("vmovaps %s,%s\n", nameYMMReg(rG
), dis_buf
);
24908 goto decode_success
;
24914 IRTemp rmode
= newTemp(Ity_I32
);
24915 assign( rmode
, get_sse_roundingmode() );
24916 /* VCVTSI2SD r/m32, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.W0 2A /r */
24917 if (haveF2no66noF3(pfx
) && 0==getRexW(pfx
)/*W0*/) {
24918 UChar modrm
= getUChar(delta
);
24919 UInt rV
= getVexNvvvv(pfx
);
24920 UInt rD
= gregOfRexRM(pfx
, modrm
);
24921 IRTemp arg32
= newTemp(Ity_I32
);
24922 if (epartIsReg(modrm
)) {
24923 UInt rS
= eregOfRexRM(pfx
,modrm
);
24924 assign( arg32
, getIReg32(rS
) );
24926 DIP("vcvtsi2sdl %s,%s,%s\n",
24927 nameIReg32(rS
), nameXMMReg(rV
), nameXMMReg(rD
));
24929 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24930 assign( arg32
, loadLE(Ity_I32
, mkexpr(addr
)) );
24932 DIP("vcvtsi2sdl %s,%s,%s\n",
24933 dis_buf
, nameXMMReg(rV
), nameXMMReg(rD
));
24935 putXMMRegLane64F( rD
, 0,
24936 unop(Iop_I32StoF64
, mkexpr(arg32
)));
24937 putXMMRegLane64( rD
, 1, getXMMRegLane64( rV
, 1 ));
24938 putYMMRegLane128( rD
, 1, mkV128(0) );
24940 goto decode_success
;
24942 /* VCVTSI2SD r/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.W1 2A /r */
24943 if (haveF2no66noF3(pfx
) && 1==getRexW(pfx
)/*W1*/) {
24944 UChar modrm
= getUChar(delta
);
24945 UInt rV
= getVexNvvvv(pfx
);
24946 UInt rD
= gregOfRexRM(pfx
, modrm
);
24947 IRTemp arg64
= newTemp(Ity_I64
);
24948 if (epartIsReg(modrm
)) {
24949 UInt rS
= eregOfRexRM(pfx
,modrm
);
24950 assign( arg64
, getIReg64(rS
) );
24952 DIP("vcvtsi2sdq %s,%s,%s\n",
24953 nameIReg64(rS
), nameXMMReg(rV
), nameXMMReg(rD
));
24955 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24956 assign( arg64
, loadLE(Ity_I64
, mkexpr(addr
)) );
24958 DIP("vcvtsi2sdq %s,%s,%s\n",
24959 dis_buf
, nameXMMReg(rV
), nameXMMReg(rD
));
24961 putXMMRegLane64F( rD
, 0,
24962 binop( Iop_I64StoF64
,
24963 get_sse_roundingmode(),
24965 putXMMRegLane64( rD
, 1, getXMMRegLane64( rV
, 1 ));
24966 putYMMRegLane128( rD
, 1, mkV128(0) );
24968 goto decode_success
;
24970 /* VCVTSI2SS r/m64, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.W1 2A /r */
24971 if (haveF3no66noF2(pfx
) && 1==getRexW(pfx
)/*W1*/) {
24972 UChar modrm
= getUChar(delta
);
24973 UInt rV
= getVexNvvvv(pfx
);
24974 UInt rD
= gregOfRexRM(pfx
, modrm
);
24975 IRTemp arg64
= newTemp(Ity_I64
);
24976 if (epartIsReg(modrm
)) {
24977 UInt rS
= eregOfRexRM(pfx
,modrm
);
24978 assign( arg64
, getIReg64(rS
) );
24980 DIP("vcvtsi2ssq %s,%s,%s\n",
24981 nameIReg64(rS
), nameXMMReg(rV
), nameXMMReg(rD
));
24983 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
24984 assign( arg64
, loadLE(Ity_I64
, mkexpr(addr
)) );
24986 DIP("vcvtsi2ssq %s,%s,%s\n",
24987 dis_buf
, nameXMMReg(rV
), nameXMMReg(rD
));
24989 putXMMRegLane32F( rD
, 0,
24990 binop(Iop_F64toF32
,
24992 binop(Iop_I64StoF64
, mkexpr(rmode
),
24993 mkexpr(arg64
)) ) );
24994 putXMMRegLane32( rD
, 1, getXMMRegLane32( rV
, 1 ));
24995 putXMMRegLane64( rD
, 1, getXMMRegLane64( rV
, 1 ));
24996 putYMMRegLane128( rD
, 1, mkV128(0) );
24998 goto decode_success
;
25000 /* VCVTSI2SS r/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.W0 2A /r */
25001 if (haveF3no66noF2(pfx
) && 0==getRexW(pfx
)/*W0*/) {
25002 UChar modrm
= getUChar(delta
);
25003 UInt rV
= getVexNvvvv(pfx
);
25004 UInt rD
= gregOfRexRM(pfx
, modrm
);
25005 IRTemp arg32
= newTemp(Ity_I32
);
25006 if (epartIsReg(modrm
)) {
25007 UInt rS
= eregOfRexRM(pfx
,modrm
);
25008 assign( arg32
, getIReg32(rS
) );
25010 DIP("vcvtsi2ssl %s,%s,%s\n",
25011 nameIReg32(rS
), nameXMMReg(rV
), nameXMMReg(rD
));
25013 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
25014 assign( arg32
, loadLE(Ity_I32
, mkexpr(addr
)) );
25016 DIP("vcvtsi2ssl %s,%s,%s\n",
25017 dis_buf
, nameXMMReg(rV
), nameXMMReg(rD
));
25019 putXMMRegLane32F( rD
, 0,
25020 binop(Iop_F64toF32
,
25022 unop(Iop_I32StoF64
, mkexpr(arg32
)) ) );
25023 putXMMRegLane32( rD
, 1, getXMMRegLane32( rV
, 1 ));
25024 putXMMRegLane64( rD
, 1, getXMMRegLane64( rV
, 1 ));
25025 putYMMRegLane128( rD
, 1, mkV128(0) );
25027 goto decode_success
;
25033 /* VMOVNTPD xmm1, m128 = VEX.128.66.0F.WIG 2B /r */
25034 /* VMOVNTPS xmm1, m128 = VEX.128.0F.WIG 2B /r */
25035 if ((have66noF2noF3(pfx
) || haveNo66noF2noF3(pfx
))
25036 && 0==getVexL(pfx
)/*128*/ && !epartIsReg(getUChar(delta
))) {
25037 UChar modrm
= getUChar(delta
);
25038 UInt rS
= gregOfRexRM(pfx
, modrm
);
25039 IRTemp tS
= newTemp(Ity_V128
);
25040 assign(tS
, getXMMReg(rS
));
25041 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
25043 gen_SIGNAL_if_not_16_aligned(vbi
, addr
);
25044 storeLE(mkexpr(addr
), mkexpr(tS
));
25045 DIP("vmovntp%c %s,%s\n", have66(pfx
) ? 'd' : 's',
25046 nameXMMReg(rS
), dis_buf
);
25047 goto decode_success
;
25049 /* VMOVNTPD ymm1, m256 = VEX.256.66.0F.WIG 2B /r */
25050 /* VMOVNTPS ymm1, m256 = VEX.256.0F.WIG 2B /r */
25051 if ((have66noF2noF3(pfx
) || haveNo66noF2noF3(pfx
))
25052 && 1==getVexL(pfx
)/*256*/ && !epartIsReg(getUChar(delta
))) {
25053 UChar modrm
= getUChar(delta
);
25054 UInt rS
= gregOfRexRM(pfx
, modrm
);
25055 IRTemp tS
= newTemp(Ity_V256
);
25056 assign(tS
, getYMMReg(rS
));
25057 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
25059 gen_SIGNAL_if_not_32_aligned(vbi
, addr
);
25060 storeLE(mkexpr(addr
), mkexpr(tS
));
25061 DIP("vmovntp%c %s,%s\n", have66(pfx
) ? 'd' : 's',
25062 nameYMMReg(rS
), dis_buf
);
25063 goto decode_success
;
25068 /* VCVTTSD2SI xmm1/m32, r32 = VEX.LIG.F2.0F.W0 2C /r */
25069 if (haveF2no66noF3(pfx
) && 0==getRexW(pfx
)/*W0*/) {
25070 delta
= dis_CVTxSD2SI( vbi
, pfx
, delta
, True
/*isAvx*/, opc
, 4);
25071 goto decode_success
;
25073 /* VCVTTSD2SI xmm1/m64, r64 = VEX.LIG.F2.0F.W1 2C /r */
25074 if (haveF2no66noF3(pfx
) && 1==getRexW(pfx
)/*W1*/) {
25075 delta
= dis_CVTxSD2SI( vbi
, pfx
, delta
, True
/*isAvx*/, opc
, 8);
25076 goto decode_success
;
25078 /* VCVTTSS2SI xmm1/m32, r32 = VEX.LIG.F3.0F.W0 2C /r */
25079 if (haveF3no66noF2(pfx
) && 0==getRexW(pfx
)/*W0*/) {
25080 delta
= dis_CVTxSS2SI( vbi
, pfx
, delta
, True
/*isAvx*/, opc
, 4);
25081 goto decode_success
;
25083 /* VCVTTSS2SI xmm1/m64, r64 = VEX.LIG.F3.0F.W1 2C /r */
25084 if (haveF3no66noF2(pfx
) && 1==getRexW(pfx
)/*W1*/) {
25085 delta
= dis_CVTxSS2SI( vbi
, pfx
, delta
, True
/*isAvx*/, opc
, 8);
25086 goto decode_success
;
25091 /* VCVTSD2SI xmm1/m32, r32 = VEX.LIG.F2.0F.W0 2D /r */
25092 if (haveF2no66noF3(pfx
) && 0==getRexW(pfx
)/*W0*/) {
25093 delta
= dis_CVTxSD2SI( vbi
, pfx
, delta
, True
/*isAvx*/, opc
, 4);
25094 goto decode_success
;
25096 /* VCVTSD2SI xmm1/m64, r64 = VEX.LIG.F2.0F.W1 2D /r */
25097 if (haveF2no66noF3(pfx
) && 1==getRexW(pfx
)/*W1*/) {
25098 delta
= dis_CVTxSD2SI( vbi
, pfx
, delta
, True
/*isAvx*/, opc
, 8);
25099 goto decode_success
;
25101 /* VCVTSS2SI xmm1/m32, r32 = VEX.LIG.F3.0F.W0 2D /r */
25102 if (haveF3no66noF2(pfx
) && 0==getRexW(pfx
)/*W0*/) {
25103 delta
= dis_CVTxSS2SI( vbi
, pfx
, delta
, True
/*isAvx*/, opc
, 4);
25104 goto decode_success
;
25106 /* VCVTSS2SI xmm1/m64, r64 = VEX.LIG.F3.0F.W1 2D /r */
25107 if (haveF3no66noF2(pfx
) && 1==getRexW(pfx
)/*W1*/) {
25108 delta
= dis_CVTxSS2SI( vbi
, pfx
, delta
, True
/*isAvx*/, opc
, 8);
25109 goto decode_success
;
25115 /* VUCOMISD xmm2/m64, xmm1 = VEX.LIG.66.0F.WIG 2E /r */
25116 /* VCOMISD xmm2/m64, xmm1 = VEX.LIG.66.0F.WIG 2F /r */
25117 if (have66noF2noF3(pfx
)) {
25118 delta
= dis_COMISD( vbi
, pfx
, delta
, True
/*isAvx*/, opc
);
25119 goto decode_success
;
25121 /* VUCOMISS xmm2/m32, xmm1 = VEX.LIG.0F.WIG 2E /r */
25122 /* VCOMISS xmm2/m32, xmm1 = VEX.LIG.0F.WIG 2F /r */
25123 if (haveNo66noF2noF3(pfx
)) {
25124 delta
= dis_COMISS( vbi
, pfx
, delta
, True
/*isAvx*/, opc
);
25125 goto decode_success
;
25130 /* VMOVMSKPD xmm2, r32 = VEX.128.66.0F.WIG 50 /r */
25131 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25132 delta
= dis_MOVMSKPD_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
25133 goto decode_success
;
25135 /* VMOVMSKPD ymm2, r32 = VEX.256.66.0F.WIG 50 /r */
25136 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25137 delta
= dis_MOVMSKPD_256( vbi
, pfx
, delta
);
25138 goto decode_success
;
25140 /* VMOVMSKPS xmm2, r32 = VEX.128.0F.WIG 50 /r */
25141 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25142 delta
= dis_MOVMSKPS_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
25143 goto decode_success
;
25145 /* VMOVMSKPS ymm2, r32 = VEX.256.0F.WIG 50 /r */
25146 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25147 delta
= dis_MOVMSKPS_256( vbi
, pfx
, delta
);
25148 goto decode_success
;
25153 /* VSQRTSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 51 /r */
25154 if (haveF3no66noF2(pfx
)) {
25155 delta
= dis_AVX128_E_V_to_G_lo32_unary(
25156 uses_vvvv
, vbi
, pfx
, delta
, "vsqrtss", Iop_Sqrt32F0x4
);
25157 goto decode_success
;
25159 /* VSQRTPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 51 /r */
25160 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25161 delta
= dis_AVX128_E_to_G_unary_all(
25162 uses_vvvv
, vbi
, pfx
, delta
, "vsqrtps", Iop_Sqrt32Fx4
);
25163 goto decode_success
;
25165 /* VSQRTPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 51 /r */
25166 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25167 delta
= dis_AVX256_E_to_G_unary_all(
25168 uses_vvvv
, vbi
, pfx
, delta
, "vsqrtps", Iop_Sqrt32Fx8
);
25169 goto decode_success
;
25171 /* VSQRTSD xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F2.0F.WIG 51 /r */
25172 if (haveF2no66noF3(pfx
)) {
25173 delta
= dis_AVX128_E_V_to_G_lo64_unary(
25174 uses_vvvv
, vbi
, pfx
, delta
, "vsqrtsd", Iop_Sqrt64F0x2
);
25175 goto decode_success
;
25177 /* VSQRTPD xmm2/m128(E), xmm1(G) = VEX.NDS.128.66.0F.WIG 51 /r */
25178 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25179 delta
= dis_AVX128_E_to_G_unary_all(
25180 uses_vvvv
, vbi
, pfx
, delta
, "vsqrtpd", Iop_Sqrt64Fx2
);
25181 goto decode_success
;
25183 /* VSQRTPD ymm2/m256(E), ymm1(G) = VEX.NDS.256.66.0F.WIG 51 /r */
25184 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25185 delta
= dis_AVX256_E_to_G_unary_all(
25186 uses_vvvv
, vbi
, pfx
, delta
, "vsqrtpd", Iop_Sqrt64Fx4
);
25187 goto decode_success
;
25192 /* VRSQRTSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 52 /r */
25193 if (haveF3no66noF2(pfx
)) {
25194 delta
= dis_AVX128_E_V_to_G_lo32_unary(
25195 uses_vvvv
, vbi
, pfx
, delta
, "vrsqrtss",
25196 Iop_RSqrtEst32F0x4
);
25197 goto decode_success
;
25199 /* VRSQRTPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 52 /r */
25200 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25201 delta
= dis_AVX128_E_to_G_unary_all(
25202 uses_vvvv
, vbi
, pfx
, delta
, "vrsqrtps", Iop_RSqrtEst32Fx4
);
25203 goto decode_success
;
25205 /* VRSQRTPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 52 /r */
25206 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25207 delta
= dis_AVX256_E_to_G_unary_all(
25208 uses_vvvv
, vbi
, pfx
, delta
, "vrsqrtps", Iop_RSqrtEst32Fx8
);
25209 goto decode_success
;
25214 /* VRCPSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 53 /r */
25215 if (haveF3no66noF2(pfx
)) {
25216 delta
= dis_AVX128_E_V_to_G_lo32_unary(
25217 uses_vvvv
, vbi
, pfx
, delta
, "vrcpss", Iop_RecipEst32F0x4
);
25218 goto decode_success
;
25220 /* VRCPPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 53 /r */
25221 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25222 delta
= dis_AVX128_E_to_G_unary_all(
25223 uses_vvvv
, vbi
, pfx
, delta
, "vrcpps", Iop_RecipEst32Fx4
);
25224 goto decode_success
;
25226 /* VRCPPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 53 /r */
25227 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25228 delta
= dis_AVX256_E_to_G_unary_all(
25229 uses_vvvv
, vbi
, pfx
, delta
, "vrcpps", Iop_RecipEst32Fx8
);
25230 goto decode_success
;
25235 /* VANDPD r/m, rV, r ::: r = rV & r/m */
25236 /* VANDPD = VEX.NDS.128.66.0F.WIG 54 /r */
25237 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25238 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25239 uses_vvvv
, vbi
, pfx
, delta
, "vandpd", Iop_AndV128
);
25240 goto decode_success
;
25242 /* VANDPD r/m, rV, r ::: r = rV & r/m */
25243 /* VANDPD = VEX.NDS.256.66.0F.WIG 54 /r */
25244 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25245 delta
= dis_AVX256_E_V_to_G(
25246 uses_vvvv
, vbi
, pfx
, delta
, "vandpd", Iop_AndV256
);
25247 goto decode_success
;
25249 /* VANDPS = VEX.NDS.128.0F.WIG 54 /r */
25250 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25251 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25252 uses_vvvv
, vbi
, pfx
, delta
, "vandps", Iop_AndV128
);
25253 goto decode_success
;
25255 /* VANDPS = VEX.NDS.256.0F.WIG 54 /r */
25256 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25257 delta
= dis_AVX256_E_V_to_G(
25258 uses_vvvv
, vbi
, pfx
, delta
, "vandps", Iop_AndV256
);
25259 goto decode_success
;
25264 /* VANDNPD r/m, rV, r ::: r = (not rV) & r/m */
25265 /* VANDNPD = VEX.NDS.128.66.0F.WIG 55 /r */
25266 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25267 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25268 uses_vvvv
, vbi
, pfx
, delta
, "vandpd", Iop_AndV128
,
25269 NULL
, True
/*invertLeftArg*/, False
/*swapArgs*/ );
25270 goto decode_success
;
25272 /* VANDNPD = VEX.NDS.256.66.0F.WIG 55 /r */
25273 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25274 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
25275 uses_vvvv
, vbi
, pfx
, delta
, "vandpd", Iop_AndV256
,
25276 NULL
, True
/*invertLeftArg*/, False
/*swapArgs*/ );
25277 goto decode_success
;
25279 /* VANDNPS = VEX.NDS.128.0F.WIG 55 /r */
25280 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25281 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25282 uses_vvvv
, vbi
, pfx
, delta
, "vandps", Iop_AndV128
,
25283 NULL
, True
/*invertLeftArg*/, False
/*swapArgs*/ );
25284 goto decode_success
;
25286 /* VANDNPS = VEX.NDS.256.0F.WIG 55 /r */
25287 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25288 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
25289 uses_vvvv
, vbi
, pfx
, delta
, "vandps", Iop_AndV256
,
25290 NULL
, True
/*invertLeftArg*/, False
/*swapArgs*/ );
25291 goto decode_success
;
25296 /* VORPD r/m, rV, r ::: r = rV | r/m */
25297 /* VORPD = VEX.NDS.128.66.0F.WIG 56 /r */
25298 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25299 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25300 uses_vvvv
, vbi
, pfx
, delta
, "vorpd", Iop_OrV128
);
25301 goto decode_success
;
25303 /* VORPD r/m, rV, r ::: r = rV | r/m */
25304 /* VORPD = VEX.NDS.256.66.0F.WIG 56 /r */
25305 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25306 delta
= dis_AVX256_E_V_to_G(
25307 uses_vvvv
, vbi
, pfx
, delta
, "vorpd", Iop_OrV256
);
25308 goto decode_success
;
25310 /* VORPS r/m, rV, r ::: r = rV | r/m */
25311 /* VORPS = VEX.NDS.128.0F.WIG 56 /r */
25312 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25313 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25314 uses_vvvv
, vbi
, pfx
, delta
, "vorps", Iop_OrV128
);
25315 goto decode_success
;
25317 /* VORPS r/m, rV, r ::: r = rV | r/m */
25318 /* VORPS = VEX.NDS.256.0F.WIG 56 /r */
25319 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25320 delta
= dis_AVX256_E_V_to_G(
25321 uses_vvvv
, vbi
, pfx
, delta
, "vorps", Iop_OrV256
);
25322 goto decode_success
;
25327 /* VXORPD r/m, rV, r ::: r = rV ^ r/m */
25328 /* VXORPD = VEX.NDS.128.66.0F.WIG 57 /r */
25329 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25330 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25331 uses_vvvv
, vbi
, pfx
, delta
, "vxorpd", Iop_XorV128
);
25332 goto decode_success
;
25334 /* VXORPD r/m, rV, r ::: r = rV ^ r/m */
25335 /* VXORPD = VEX.NDS.256.66.0F.WIG 57 /r */
25336 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25337 delta
= dis_AVX256_E_V_to_G(
25338 uses_vvvv
, vbi
, pfx
, delta
, "vxorpd", Iop_XorV256
);
25339 goto decode_success
;
25341 /* VXORPS r/m, rV, r ::: r = rV ^ r/m */
25342 /* VXORPS = VEX.NDS.128.0F.WIG 57 /r */
25343 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25344 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25345 uses_vvvv
, vbi
, pfx
, delta
, "vxorps", Iop_XorV128
);
25346 goto decode_success
;
25348 /* VXORPS r/m, rV, r ::: r = rV ^ r/m */
25349 /* VXORPS = VEX.NDS.256.0F.WIG 57 /r */
25350 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25351 delta
= dis_AVX256_E_V_to_G(
25352 uses_vvvv
, vbi
, pfx
, delta
, "vxorps", Iop_XorV256
);
25353 goto decode_success
;
25358 /* VADDSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 58 /r */
25359 if (haveF2no66noF3(pfx
)) {
25360 delta
= dis_AVX128_E_V_to_G_lo64(
25361 uses_vvvv
, vbi
, pfx
, delta
, "vaddsd", Iop_Add64F0x2
);
25362 goto decode_success
;
25364 /* VADDSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 58 /r */
25365 if (haveF3no66noF2(pfx
)) {
25366 delta
= dis_AVX128_E_V_to_G_lo32(
25367 uses_vvvv
, vbi
, pfx
, delta
, "vaddss", Iop_Add32F0x4
);
25368 goto decode_success
;
25370 /* VADDPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 58 /r */
25371 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25372 delta
= dis_AVX128_E_V_to_G(
25373 uses_vvvv
, vbi
, pfx
, delta
, "vaddps", Iop_Add32Fx4
);
25374 goto decode_success
;
25376 /* VADDPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 58 /r */
25377 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25378 delta
= dis_AVX256_E_V_to_G(
25379 uses_vvvv
, vbi
, pfx
, delta
, "vaddps", Iop_Add32Fx8
);
25380 goto decode_success
;
25382 /* VADDPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 58 /r */
25383 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25384 delta
= dis_AVX128_E_V_to_G(
25385 uses_vvvv
, vbi
, pfx
, delta
, "vaddpd", Iop_Add64Fx2
);
25386 goto decode_success
;
25388 /* VADDPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 58 /r */
25389 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25390 delta
= dis_AVX256_E_V_to_G(
25391 uses_vvvv
, vbi
, pfx
, delta
, "vaddpd", Iop_Add64Fx4
);
25392 goto decode_success
;
25397 /* VMULSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 59 /r */
25398 if (haveF2no66noF3(pfx
)) {
25399 delta
= dis_AVX128_E_V_to_G_lo64(
25400 uses_vvvv
, vbi
, pfx
, delta
, "vmulsd", Iop_Mul64F0x2
);
25401 goto decode_success
;
25403 /* VMULSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 59 /r */
25404 if (haveF3no66noF2(pfx
)) {
25405 delta
= dis_AVX128_E_V_to_G_lo32(
25406 uses_vvvv
, vbi
, pfx
, delta
, "vmulss", Iop_Mul32F0x4
);
25407 goto decode_success
;
25409 /* VMULPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 59 /r */
25410 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25411 delta
= dis_AVX128_E_V_to_G(
25412 uses_vvvv
, vbi
, pfx
, delta
, "vmulps", Iop_Mul32Fx4
);
25413 goto decode_success
;
25415 /* VMULPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 59 /r */
25416 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25417 delta
= dis_AVX256_E_V_to_G(
25418 uses_vvvv
, vbi
, pfx
, delta
, "vmulps", Iop_Mul32Fx8
);
25419 goto decode_success
;
25421 /* VMULPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 59 /r */
25422 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25423 delta
= dis_AVX128_E_V_to_G(
25424 uses_vvvv
, vbi
, pfx
, delta
, "vmulpd", Iop_Mul64Fx2
);
25425 goto decode_success
;
25427 /* VMULPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 59 /r */
25428 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25429 delta
= dis_AVX256_E_V_to_G(
25430 uses_vvvv
, vbi
, pfx
, delta
, "vmulpd", Iop_Mul64Fx4
);
25431 goto decode_success
;
25436 /* VCVTPS2PD xmm2/m64, xmm1 = VEX.128.0F.WIG 5A /r */
25437 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25438 delta
= dis_CVTPS2PD_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
25439 goto decode_success
;
25441 /* VCVTPS2PD xmm2/m128, ymm1 = VEX.256.0F.WIG 5A /r */
25442 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25443 delta
= dis_CVTPS2PD_256( vbi
, pfx
, delta
);
25444 goto decode_success
;
25446 /* VCVTPD2PS xmm2/m128, xmm1 = VEX.128.66.0F.WIG 5A /r */
25447 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25448 delta
= dis_CVTPD2PS_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
25449 goto decode_success
;
25451 /* VCVTPD2PS ymm2/m256, xmm1 = VEX.256.66.0F.WIG 5A /r */
25452 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25453 delta
= dis_CVTPD2PS_256( vbi
, pfx
, delta
);
25454 goto decode_success
;
25456 /* VCVTSD2SS xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5A /r */
25457 if (haveF2no66noF3(pfx
)) {
25458 UChar modrm
= getUChar(delta
);
25459 UInt rV
= getVexNvvvv(pfx
);
25460 UInt rD
= gregOfRexRM(pfx
, modrm
);
25461 IRTemp f64lo
= newTemp(Ity_F64
);
25462 IRTemp rmode
= newTemp(Ity_I32
);
25463 assign( rmode
, get_sse_roundingmode() );
25464 if (epartIsReg(modrm
)) {
25465 UInt rS
= eregOfRexRM(pfx
,modrm
);
25466 assign(f64lo
, getXMMRegLane64F(rS
, 0));
25468 DIP("vcvtsd2ss %s,%s,%s\n",
25469 nameXMMReg(rS
), nameXMMReg(rV
), nameXMMReg(rD
));
25471 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
25472 assign(f64lo
, loadLE(Ity_F64
, mkexpr(addr
)) );
25474 DIP("vcvtsd2ss %s,%s,%s\n",
25475 dis_buf
, nameXMMReg(rV
), nameXMMReg(rD
));
25477 putXMMRegLane32F( rD
, 0,
25478 binop( Iop_F64toF32
, mkexpr(rmode
),
25480 putXMMRegLane32( rD
, 1, getXMMRegLane32( rV
, 1 ));
25481 putXMMRegLane64( rD
, 1, getXMMRegLane64( rV
, 1 ));
25482 putYMMRegLane128( rD
, 1, mkV128(0) );
25484 goto decode_success
;
25486 /* VCVTSS2SD xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5A /r */
25487 if (haveF3no66noF2(pfx
)) {
25488 UChar modrm
= getUChar(delta
);
25489 UInt rV
= getVexNvvvv(pfx
);
25490 UInt rD
= gregOfRexRM(pfx
, modrm
);
25491 IRTemp f32lo
= newTemp(Ity_F32
);
25492 if (epartIsReg(modrm
)) {
25493 UInt rS
= eregOfRexRM(pfx
,modrm
);
25494 assign(f32lo
, getXMMRegLane32F(rS
, 0));
25496 DIP("vcvtss2sd %s,%s,%s\n",
25497 nameXMMReg(rS
), nameXMMReg(rV
), nameXMMReg(rD
));
25499 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
25500 assign(f32lo
, loadLE(Ity_F32
, mkexpr(addr
)) );
25502 DIP("vcvtss2sd %s,%s,%s\n",
25503 dis_buf
, nameXMMReg(rV
), nameXMMReg(rD
));
25505 putXMMRegLane64F( rD
, 0,
25506 unop( Iop_F32toF64
, mkexpr(f32lo
)) );
25507 putXMMRegLane64( rD
, 1, getXMMRegLane64( rV
, 1 ));
25508 putYMMRegLane128( rD
, 1, mkV128(0) );
25510 goto decode_success
;
25515 /* VCVTPS2DQ xmm2/m128, xmm1 = VEX.128.66.0F.WIG 5B /r */
25516 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25517 delta
= dis_CVTxPS2DQ_128( vbi
, pfx
, delta
,
25518 True
/*isAvx*/, False
/*!r2zero*/ );
25519 goto decode_success
;
25521 /* VCVTPS2DQ ymm2/m256, ymm1 = VEX.256.66.0F.WIG 5B /r */
25522 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25523 delta
= dis_CVTxPS2DQ_256( vbi
, pfx
, delta
,
25524 False
/*!r2zero*/ );
25525 goto decode_success
;
25527 /* VCVTTPS2DQ xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 5B /r */
25528 if (haveF3no66noF2(pfx
) && 0==getVexL(pfx
)/*128*/) {
25529 delta
= dis_CVTxPS2DQ_128( vbi
, pfx
, delta
,
25530 True
/*isAvx*/, True
/*r2zero*/ );
25531 goto decode_success
;
25533 /* VCVTTPS2DQ ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 5B /r */
25534 if (haveF3no66noF2(pfx
) && 1==getVexL(pfx
)/*256*/) {
25535 delta
= dis_CVTxPS2DQ_256( vbi
, pfx
, delta
,
25537 goto decode_success
;
25539 /* VCVTDQ2PS xmm2/m128, xmm1 = VEX.128.0F.WIG 5B /r */
25540 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25541 delta
= dis_CVTDQ2PS_128 ( vbi
, pfx
, delta
, True
/*isAvx*/ );
25542 goto decode_success
;
25544 /* VCVTDQ2PS ymm2/m256, ymm1 = VEX.256.0F.WIG 5B /r */
25545 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25546 delta
= dis_CVTDQ2PS_256 ( vbi
, pfx
, delta
);
25547 goto decode_success
;
25552 /* VSUBSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5C /r */
25553 if (haveF2no66noF3(pfx
)) {
25554 delta
= dis_AVX128_E_V_to_G_lo64(
25555 uses_vvvv
, vbi
, pfx
, delta
, "vsubsd", Iop_Sub64F0x2
);
25556 goto decode_success
;
25558 /* VSUBSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5C /r */
25559 if (haveF3no66noF2(pfx
)) {
25560 delta
= dis_AVX128_E_V_to_G_lo32(
25561 uses_vvvv
, vbi
, pfx
, delta
, "vsubss", Iop_Sub32F0x4
);
25562 goto decode_success
;
25564 /* VSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5C /r */
25565 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25566 delta
= dis_AVX128_E_V_to_G(
25567 uses_vvvv
, vbi
, pfx
, delta
, "vsubps", Iop_Sub32Fx4
);
25568 goto decode_success
;
25570 /* VSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5C /r */
25571 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25572 delta
= dis_AVX256_E_V_to_G(
25573 uses_vvvv
, vbi
, pfx
, delta
, "vsubps", Iop_Sub32Fx8
);
25574 goto decode_success
;
25576 /* VSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5C /r */
25577 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25578 delta
= dis_AVX128_E_V_to_G(
25579 uses_vvvv
, vbi
, pfx
, delta
, "vsubpd", Iop_Sub64Fx2
);
25580 goto decode_success
;
25582 /* VSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5C /r */
25583 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25584 delta
= dis_AVX256_E_V_to_G(
25585 uses_vvvv
, vbi
, pfx
, delta
, "vsubpd", Iop_Sub64Fx4
);
25586 goto decode_success
;
25591 /* VMINSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5D /r */
25592 if (haveF2no66noF3(pfx
)) {
25593 delta
= dis_AVX128_E_V_to_G_lo64(
25594 uses_vvvv
, vbi
, pfx
, delta
, "vminsd", Iop_Min64F0x2
);
25595 goto decode_success
;
25597 /* VMINSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5D /r */
25598 if (haveF3no66noF2(pfx
)) {
25599 delta
= dis_AVX128_E_V_to_G_lo32(
25600 uses_vvvv
, vbi
, pfx
, delta
, "vminss", Iop_Min32F0x4
);
25601 goto decode_success
;
25603 /* VMINPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5D /r */
25604 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25605 delta
= dis_AVX128_E_V_to_G(
25606 uses_vvvv
, vbi
, pfx
, delta
, "vminps", Iop_Min32Fx4
);
25607 goto decode_success
;
25609 /* VMINPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5D /r */
25610 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25611 delta
= dis_AVX256_E_V_to_G(
25612 uses_vvvv
, vbi
, pfx
, delta
, "vminps", Iop_Min32Fx8
);
25613 goto decode_success
;
25615 /* VMINPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5D /r */
25616 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25617 delta
= dis_AVX128_E_V_to_G(
25618 uses_vvvv
, vbi
, pfx
, delta
, "vminpd", Iop_Min64Fx2
);
25619 goto decode_success
;
25621 /* VMINPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5D /r */
25622 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25623 delta
= dis_AVX256_E_V_to_G(
25624 uses_vvvv
, vbi
, pfx
, delta
, "vminpd", Iop_Min64Fx4
);
25625 goto decode_success
;
25630 /* VDIVSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5E /r */
25631 if (haveF2no66noF3(pfx
)) {
25632 delta
= dis_AVX128_E_V_to_G_lo64(
25633 uses_vvvv
, vbi
, pfx
, delta
, "vdivsd", Iop_Div64F0x2
);
25634 goto decode_success
;
25636 /* VDIVSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5E /r */
25637 if (haveF3no66noF2(pfx
)) {
25638 delta
= dis_AVX128_E_V_to_G_lo32(
25639 uses_vvvv
, vbi
, pfx
, delta
, "vdivss", Iop_Div32F0x4
);
25640 goto decode_success
;
25642 /* VDIVPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5E /r */
25643 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25644 delta
= dis_AVX128_E_V_to_G(
25645 uses_vvvv
, vbi
, pfx
, delta
, "vdivps", Iop_Div32Fx4
);
25646 goto decode_success
;
25648 /* VDIVPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5E /r */
25649 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25650 delta
= dis_AVX256_E_V_to_G(
25651 uses_vvvv
, vbi
, pfx
, delta
, "vdivps", Iop_Div32Fx8
);
25652 goto decode_success
;
25654 /* VDIVPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5E /r */
25655 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25656 delta
= dis_AVX128_E_V_to_G(
25657 uses_vvvv
, vbi
, pfx
, delta
, "vdivpd", Iop_Div64Fx2
);
25658 goto decode_success
;
25660 /* VDIVPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5E /r */
25661 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25662 delta
= dis_AVX256_E_V_to_G(
25663 uses_vvvv
, vbi
, pfx
, delta
, "vdivpd", Iop_Div64Fx4
);
25664 goto decode_success
;
25669 /* VMAXSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5F /r */
25670 if (haveF2no66noF3(pfx
)) {
25671 delta
= dis_AVX128_E_V_to_G_lo64(
25672 uses_vvvv
, vbi
, pfx
, delta
, "vmaxsd", Iop_Max64F0x2
);
25673 goto decode_success
;
25675 /* VMAXSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5F /r */
25676 if (haveF3no66noF2(pfx
)) {
25677 delta
= dis_AVX128_E_V_to_G_lo32(
25678 uses_vvvv
, vbi
, pfx
, delta
, "vmaxss", Iop_Max32F0x4
);
25679 goto decode_success
;
25681 /* VMAXPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5F /r */
25682 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25683 delta
= dis_AVX128_E_V_to_G(
25684 uses_vvvv
, vbi
, pfx
, delta
, "vmaxps", Iop_Max32Fx4
);
25685 goto decode_success
;
25687 /* VMAXPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5F /r */
25688 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25689 delta
= dis_AVX256_E_V_to_G(
25690 uses_vvvv
, vbi
, pfx
, delta
, "vmaxps", Iop_Max32Fx8
);
25691 goto decode_success
;
25693 /* VMAXPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5F /r */
25694 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25695 delta
= dis_AVX128_E_V_to_G(
25696 uses_vvvv
, vbi
, pfx
, delta
, "vmaxpd", Iop_Max64Fx2
);
25697 goto decode_success
;
25699 /* VMAXPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5F /r */
25700 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25701 delta
= dis_AVX256_E_V_to_G(
25702 uses_vvvv
, vbi
, pfx
, delta
, "vmaxpd", Iop_Max64Fx4
);
25703 goto decode_success
;
25708 /* VPUNPCKLBW r/m, rV, r ::: r = interleave-lo-bytes(rV, r/m) */
25709 /* VPUNPCKLBW = VEX.NDS.128.66.0F.WIG 60 /r */
25710 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25711 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25712 uses_vvvv
, vbi
, pfx
, delta
, "vpunpcklbw",
25713 Iop_InterleaveLO8x16
, NULL
,
25714 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25715 goto decode_success
;
25717 /* VPUNPCKLBW r/m, rV, r ::: r = interleave-lo-bytes(rV, r/m) */
25718 /* VPUNPCKLBW = VEX.NDS.256.66.0F.WIG 60 /r */
25719 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25720 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25721 uses_vvvv
, vbi
, pfx
, delta
, "vpunpcklbw",
25722 math_VPUNPCKLBW_YMM
);
25723 goto decode_success
;
25728 /* VPUNPCKLWD r/m, rV, r ::: r = interleave-lo-words(rV, r/m) */
25729 /* VPUNPCKLWD = VEX.NDS.128.66.0F.WIG 61 /r */
25730 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25731 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25732 uses_vvvv
, vbi
, pfx
, delta
, "vpunpcklwd",
25733 Iop_InterleaveLO16x8
, NULL
,
25734 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25735 goto decode_success
;
25737 /* VPUNPCKLWD r/m, rV, r ::: r = interleave-lo-words(rV, r/m) */
25738 /* VPUNPCKLWD = VEX.NDS.256.66.0F.WIG 61 /r */
25739 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25740 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25741 uses_vvvv
, vbi
, pfx
, delta
, "vpunpcklwd",
25742 math_VPUNPCKLWD_YMM
);
25743 goto decode_success
;
25748 /* VPUNPCKLDQ r/m, rV, r ::: r = interleave-lo-dwords(rV, r/m) */
25749 /* VPUNPCKLDQ = VEX.NDS.128.66.0F.WIG 62 /r */
25750 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25751 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25752 uses_vvvv
, vbi
, pfx
, delta
, "vpunpckldq",
25753 Iop_InterleaveLO32x4
, NULL
,
25754 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25755 goto decode_success
;
25757 /* VPUNPCKLDQ r/m, rV, r ::: r = interleave-lo-dwords(rV, r/m) */
25758 /* VPUNPCKLDQ = VEX.NDS.256.66.0F.WIG 62 /r */
25759 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25760 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25761 uses_vvvv
, vbi
, pfx
, delta
, "vpunpckldq",
25762 math_VPUNPCKLDQ_YMM
);
25763 goto decode_success
;
25768 /* VPACKSSWB r/m, rV, r ::: r = QNarrowBin16Sto8Sx16(rV, r/m) */
25769 /* VPACKSSWB = VEX.NDS.128.66.0F.WIG 63 /r */
25770 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25771 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25772 uses_vvvv
, vbi
, pfx
, delta
, "vpacksswb",
25773 Iop_QNarrowBin16Sto8Sx16
, NULL
,
25774 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25775 goto decode_success
;
25777 /* VPACKSSWB r/m, rV, r ::: r = QNarrowBin16Sto8Sx16(rV, r/m) */
25778 /* VPACKSSWB = VEX.NDS.256.66.0F.WIG 63 /r */
25779 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25780 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25781 uses_vvvv
, vbi
, pfx
, delta
, "vpacksswb",
25782 math_VPACKSSWB_YMM
);
25783 goto decode_success
;
25788 /* VPCMPGTB r/m, rV, r ::: r = rV `>s-by-8s` r/m */
25789 /* VPCMPGTB = VEX.NDS.128.66.0F.WIG 64 /r */
25790 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25791 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25792 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpgtb", Iop_CmpGT8Sx16
);
25793 goto decode_success
;
25795 /* VPCMPGTB r/m, rV, r ::: r = rV `>s-by-8s` r/m */
25796 /* VPCMPGTB = VEX.NDS.256.66.0F.WIG 64 /r */
25797 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25798 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
25799 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpgtb", Iop_CmpGT8Sx32
);
25800 goto decode_success
;
25805 /* VPCMPGTW r/m, rV, r ::: r = rV `>s-by-16s` r/m */
25806 /* VPCMPGTW = VEX.NDS.128.66.0F.WIG 65 /r */
25807 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25808 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25809 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpgtw", Iop_CmpGT16Sx8
);
25810 goto decode_success
;
25812 /* VPCMPGTW r/m, rV, r ::: r = rV `>s-by-16s` r/m */
25813 /* VPCMPGTW = VEX.NDS.256.66.0F.WIG 65 /r */
25814 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25815 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
25816 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpgtw", Iop_CmpGT16Sx16
);
25817 goto decode_success
;
25822 /* VPCMPGTD r/m, rV, r ::: r = rV `>s-by-32s` r/m */
25823 /* VPCMPGTD = VEX.NDS.128.66.0F.WIG 66 /r */
25824 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25825 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25826 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpgtd", Iop_CmpGT32Sx4
);
25827 goto decode_success
;
25829 /* VPCMPGTD r/m, rV, r ::: r = rV `>s-by-32s` r/m */
25830 /* VPCMPGTD = VEX.NDS.256.66.0F.WIG 66 /r */
25831 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25832 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
25833 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpgtd", Iop_CmpGT32Sx8
);
25834 goto decode_success
;
25839 /* VPACKUSWB r/m, rV, r ::: r = QNarrowBin16Sto8Ux16(rV, r/m) */
25840 /* VPACKUSWB = VEX.NDS.128.66.0F.WIG 67 /r */
25841 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25842 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25843 uses_vvvv
, vbi
, pfx
, delta
, "vpackuswb",
25844 Iop_QNarrowBin16Sto8Ux16
, NULL
,
25845 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25846 goto decode_success
;
25848 /* VPACKUSWB r/m, rV, r ::: r = QNarrowBin16Sto8Ux16(rV, r/m) */
25849 /* VPACKUSWB = VEX.NDS.256.66.0F.WIG 67 /r */
25850 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25851 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25852 uses_vvvv
, vbi
, pfx
, delta
, "vpackuswb",
25853 math_VPACKUSWB_YMM
);
25854 goto decode_success
;
25859 /* VPUNPCKHBW r/m, rV, r ::: r = interleave-hi-bytes(rV, r/m) */
25860 /* VPUNPCKHBW = VEX.NDS.128.0F.WIG 68 /r */
25861 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25862 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25863 uses_vvvv
, vbi
, pfx
, delta
, "vpunpckhbw",
25864 Iop_InterleaveHI8x16
, NULL
,
25865 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25866 goto decode_success
;
25868 /* VPUNPCKHBW r/m, rV, r ::: r = interleave-hi-bytes(rV, r/m) */
25869 /* VPUNPCKHBW = VEX.NDS.256.0F.WIG 68 /r */
25870 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25871 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25872 uses_vvvv
, vbi
, pfx
, delta
, "vpunpckhbw",
25873 math_VPUNPCKHBW_YMM
);
25874 goto decode_success
;
25879 /* VPUNPCKHWD r/m, rV, r ::: r = interleave-hi-words(rV, r/m) */
25880 /* VPUNPCKHWD = VEX.NDS.128.0F.WIG 69 /r */
25881 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25882 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25883 uses_vvvv
, vbi
, pfx
, delta
, "vpunpckhwd",
25884 Iop_InterleaveHI16x8
, NULL
,
25885 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25886 goto decode_success
;
25888 /* VPUNPCKHWD r/m, rV, r ::: r = interleave-hi-words(rV, r/m) */
25889 /* VPUNPCKHWD = VEX.NDS.256.0F.WIG 69 /r */
25890 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25891 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25892 uses_vvvv
, vbi
, pfx
, delta
, "vpunpckhwd",
25893 math_VPUNPCKHWD_YMM
);
25894 goto decode_success
;
25899 /* VPUNPCKHDQ r/m, rV, r ::: r = interleave-hi-dwords(rV, r/m) */
25900 /* VPUNPCKHDQ = VEX.NDS.128.66.0F.WIG 6A /r */
25901 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25902 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25903 uses_vvvv
, vbi
, pfx
, delta
, "vpunpckhdq",
25904 Iop_InterleaveHI32x4
, NULL
,
25905 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25906 goto decode_success
;
25908 /* VPUNPCKHDQ r/m, rV, r ::: r = interleave-hi-dwords(rV, r/m) */
25909 /* VPUNPCKHDQ = VEX.NDS.256.66.0F.WIG 6A /r */
25910 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25911 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25912 uses_vvvv
, vbi
, pfx
, delta
, "vpunpckhdq",
25913 math_VPUNPCKHDQ_YMM
);
25914 goto decode_success
;
25919 /* VPACKSSDW r/m, rV, r ::: r = QNarrowBin32Sto16Sx8(rV, r/m) */
25920 /* VPACKSSDW = VEX.NDS.128.66.0F.WIG 6B /r */
25921 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25922 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25923 uses_vvvv
, vbi
, pfx
, delta
, "vpackssdw",
25924 Iop_QNarrowBin32Sto16Sx8
, NULL
,
25925 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25926 goto decode_success
;
25928 /* VPACKSSDW r/m, rV, r ::: r = QNarrowBin32Sto16Sx8(rV, r/m) */
25929 /* VPACKSSDW = VEX.NDS.256.66.0F.WIG 6B /r */
25930 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25931 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25932 uses_vvvv
, vbi
, pfx
, delta
, "vpackssdw",
25933 math_VPACKSSDW_YMM
);
25934 goto decode_success
;
25939 /* VPUNPCKLQDQ r/m, rV, r ::: r = interleave-lo-64bitses(rV, r/m) */
25940 /* VPUNPCKLQDQ = VEX.NDS.128.0F.WIG 6C /r */
25941 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25942 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25943 uses_vvvv
, vbi
, pfx
, delta
, "vpunpcklqdq",
25944 Iop_InterleaveLO64x2
, NULL
,
25945 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25946 goto decode_success
;
25948 /* VPUNPCKLQDQ r/m, rV, r ::: r = interleave-lo-64bitses(rV, r/m) */
25949 /* VPUNPCKLQDQ = VEX.NDS.256.0F.WIG 6C /r */
25950 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25951 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25952 uses_vvvv
, vbi
, pfx
, delta
, "vpunpcklqdq",
25953 math_VPUNPCKLQDQ_YMM
);
25954 goto decode_success
;
25959 /* VPUNPCKHQDQ r/m, rV, r ::: r = interleave-hi-64bitses(rV, r/m) */
25960 /* VPUNPCKHQDQ = VEX.NDS.128.0F.WIG 6D /r */
25961 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
25962 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25963 uses_vvvv
, vbi
, pfx
, delta
, "vpunpckhqdq",
25964 Iop_InterleaveHI64x2
, NULL
,
25965 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
25966 goto decode_success
;
25968 /* VPUNPCKHQDQ r/m, rV, r ::: r = interleave-hi-64bitses(rV, r/m) */
25969 /* VPUNPCKHQDQ = VEX.NDS.256.0F.WIG 6D /r */
25970 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
25971 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25972 uses_vvvv
, vbi
, pfx
, delta
, "vpunpckhqdq",
25973 math_VPUNPCKHQDQ_YMM
);
25974 goto decode_success
;
25979 /* VMOVD r32/m32, xmm1 = VEX.128.66.0F.W0 6E */
25980 if (have66noF2noF3(pfx
)
25981 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
25982 vassert(sz
== 2); /* even tho we are transferring 4, not 2. */
25983 UChar modrm
= getUChar(delta
);
25984 if (epartIsReg(modrm
)) {
25987 gregOfRexRM(pfx
,modrm
),
25988 unop( Iop_32UtoV128
, getIReg32(eregOfRexRM(pfx
,modrm
)) )
25990 DIP("vmovd %s, %s\n", nameIReg32(eregOfRexRM(pfx
,modrm
)),
25991 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
25993 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
25996 gregOfRexRM(pfx
,modrm
),
25997 unop( Iop_32UtoV128
,loadLE(Ity_I32
, mkexpr(addr
)))
25999 DIP("vmovd %s, %s\n", dis_buf
,
26000 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
26002 goto decode_success
;
26004 /* VMOVQ r64/m64, xmm1 = VEX.128.66.0F.W1 6E */
26005 if (have66noF2noF3(pfx
)
26006 && 0==getVexL(pfx
)/*128*/ && 1==getRexW(pfx
)/*W1*/) {
26007 vassert(sz
== 2); /* even tho we are transferring 8, not 2. */
26008 UChar modrm
= getUChar(delta
);
26009 if (epartIsReg(modrm
)) {
26012 gregOfRexRM(pfx
,modrm
),
26013 unop( Iop_64UtoV128
, getIReg64(eregOfRexRM(pfx
,modrm
)) )
26015 DIP("vmovq %s, %s\n", nameIReg64(eregOfRexRM(pfx
,modrm
)),
26016 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
26018 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26021 gregOfRexRM(pfx
,modrm
),
26022 unop( Iop_64UtoV128
,loadLE(Ity_I64
, mkexpr(addr
)))
26024 DIP("vmovq %s, %s\n", dis_buf
,
26025 nameXMMReg(gregOfRexRM(pfx
,modrm
)));
26027 goto decode_success
;
26032 /* VMOVDQA ymm2/m256, ymm1 = VEX.256.66.0F.WIG 6F */
26033 /* VMOVDQU ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 6F */
26034 if ((have66noF2noF3(pfx
) || haveF3no66noF2(pfx
))
26035 && 1==getVexL(pfx
)/*256*/) {
26036 UChar modrm
= getUChar(delta
);
26037 UInt rD
= gregOfRexRM(pfx
, modrm
);
26038 IRTemp tD
= newTemp(Ity_V256
);
26039 Bool isA
= have66noF2noF3(pfx
);
26040 HChar ch
= isA
? 'a' : 'u';
26041 if (epartIsReg(modrm
)) {
26042 UInt rS
= eregOfRexRM(pfx
, modrm
);
26044 assign(tD
, getYMMReg(rS
));
26045 DIP("vmovdq%c %s,%s\n", ch
, nameYMMReg(rS
), nameYMMReg(rD
));
26047 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26050 gen_SIGNAL_if_not_32_aligned(vbi
, addr
);
26051 assign(tD
, loadLE(Ity_V256
, mkexpr(addr
)));
26052 DIP("vmovdq%c %s,%s\n", ch
, dis_buf
, nameYMMReg(rD
));
26054 putYMMReg(rD
, mkexpr(tD
));
26055 goto decode_success
;
26057 /* VMOVDQA xmm2/m128, xmm1 = VEX.128.66.0F.WIG 6F */
26058 /* VMOVDQU xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 6F */
26059 if ((have66noF2noF3(pfx
) || haveF3no66noF2(pfx
))
26060 && 0==getVexL(pfx
)/*128*/) {
26061 UChar modrm
= getUChar(delta
);
26062 UInt rD
= gregOfRexRM(pfx
, modrm
);
26063 IRTemp tD
= newTemp(Ity_V128
);
26064 Bool isA
= have66noF2noF3(pfx
);
26065 HChar ch
= isA
? 'a' : 'u';
26066 if (epartIsReg(modrm
)) {
26067 UInt rS
= eregOfRexRM(pfx
, modrm
);
26069 assign(tD
, getXMMReg(rS
));
26070 DIP("vmovdq%c %s,%s\n", ch
, nameXMMReg(rS
), nameXMMReg(rD
));
26072 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26075 gen_SIGNAL_if_not_16_aligned(vbi
, addr
);
26076 assign(tD
, loadLE(Ity_V128
, mkexpr(addr
)));
26077 DIP("vmovdq%c %s,%s\n", ch
, dis_buf
, nameXMMReg(rD
));
26079 putYMMRegLoAndZU(rD
, mkexpr(tD
));
26080 goto decode_success
;
26085 /* VPSHUFD imm8, xmm2/m128, xmm1 = VEX.128.66.0F.WIG 70 /r ib */
26086 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26087 delta
= dis_PSHUFD_32x4( vbi
, pfx
, delta
, True
/*writesYmm*/);
26088 goto decode_success
;
26090 /* VPSHUFD imm8, ymm2/m256, ymm1 = VEX.256.66.0F.WIG 70 /r ib */
26091 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26092 delta
= dis_PSHUFD_32x8( vbi
, pfx
, delta
);
26093 goto decode_success
;
26095 /* VPSHUFLW imm8, xmm2/m128, xmm1 = VEX.128.F2.0F.WIG 70 /r ib */
26096 if (haveF2no66noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26097 delta
= dis_PSHUFxW_128( vbi
, pfx
, delta
,
26098 True
/*isAvx*/, False
/*!xIsH*/ );
26099 goto decode_success
;
26101 /* VPSHUFLW imm8, ymm2/m256, ymm1 = VEX.256.F2.0F.WIG 70 /r ib */
26102 if (haveF2no66noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26103 delta
= dis_PSHUFxW_256( vbi
, pfx
, delta
, False
/*!xIsH*/ );
26104 goto decode_success
;
26106 /* VPSHUFHW imm8, xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 70 /r ib */
26107 if (haveF3no66noF2(pfx
) && 0==getVexL(pfx
)/*128*/) {
26108 delta
= dis_PSHUFxW_128( vbi
, pfx
, delta
,
26109 True
/*isAvx*/, True
/*xIsH*/ );
26110 goto decode_success
;
26112 /* VPSHUFHW imm8, ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 70 /r ib */
26113 if (haveF3no66noF2(pfx
) && 1==getVexL(pfx
)/*256*/) {
26114 delta
= dis_PSHUFxW_256( vbi
, pfx
, delta
, True
/*xIsH*/ );
26115 goto decode_success
;
26120 /* VPSRLW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /2 ib */
26121 /* VPSRAW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /4 ib */
26122 /* VPSLLW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /6 ib */
26123 if (have66noF2noF3(pfx
)
26124 && 0==getVexL(pfx
)/*128*/
26125 && epartIsReg(getUChar(delta
))) {
26126 if (gregLO3ofRM(getUChar(delta
)) == 2/*SRL*/) {
26127 delta
= dis_AVX128_shiftE_to_V_imm( pfx
, delta
,
26128 "vpsrlw", Iop_ShrN16x8
);
26130 goto decode_success
;
26132 if (gregLO3ofRM(getUChar(delta
)) == 4/*SRA*/) {
26133 delta
= dis_AVX128_shiftE_to_V_imm( pfx
, delta
,
26134 "vpsraw", Iop_SarN16x8
);
26136 goto decode_success
;
26138 if (gregLO3ofRM(getUChar(delta
)) == 6/*SLL*/) {
26139 delta
= dis_AVX128_shiftE_to_V_imm( pfx
, delta
,
26140 "vpsllw", Iop_ShlN16x8
);
26142 goto decode_success
;
26144 /* else fall through */
26146 /* VPSRLW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /2 ib */
26147 /* VPSRAW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /4 ib */
26148 /* VPSLLW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /6 ib */
26149 if (have66noF2noF3(pfx
)
26150 && 1==getVexL(pfx
)/*256*/
26151 && epartIsReg(getUChar(delta
))) {
26152 if (gregLO3ofRM(getUChar(delta
)) == 2/*SRL*/) {
26153 delta
= dis_AVX256_shiftE_to_V_imm( pfx
, delta
,
26154 "vpsrlw", Iop_ShrN16x16
);
26156 goto decode_success
;
26158 if (gregLO3ofRM(getUChar(delta
)) == 4/*SRA*/) {
26159 delta
= dis_AVX256_shiftE_to_V_imm( pfx
, delta
,
26160 "vpsraw", Iop_SarN16x16
);
26162 goto decode_success
;
26164 if (gregLO3ofRM(getUChar(delta
)) == 6/*SLL*/) {
26165 delta
= dis_AVX256_shiftE_to_V_imm( pfx
, delta
,
26166 "vpsllw", Iop_ShlN16x16
);
26168 goto decode_success
;
26170 /* else fall through */
26175 /* VPSRLD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /2 ib */
26176 /* VPSRAD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /4 ib */
26177 /* VPSLLD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /6 ib */
26178 if (have66noF2noF3(pfx
)
26179 && 0==getVexL(pfx
)/*128*/
26180 && epartIsReg(getUChar(delta
))) {
26181 if (gregLO3ofRM(getUChar(delta
)) == 2/*SRL*/) {
26182 delta
= dis_AVX128_shiftE_to_V_imm( pfx
, delta
,
26183 "vpsrld", Iop_ShrN32x4
);
26185 goto decode_success
;
26187 if (gregLO3ofRM(getUChar(delta
)) == 4/*SRA*/) {
26188 delta
= dis_AVX128_shiftE_to_V_imm( pfx
, delta
,
26189 "vpsrad", Iop_SarN32x4
);
26191 goto decode_success
;
26193 if (gregLO3ofRM(getUChar(delta
)) == 6/*SLL*/) {
26194 delta
= dis_AVX128_shiftE_to_V_imm( pfx
, delta
,
26195 "vpslld", Iop_ShlN32x4
);
26197 goto decode_success
;
26199 /* else fall through */
26201 /* VPSRLD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /2 ib */
26202 /* VPSRAD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /4 ib */
26203 /* VPSLLD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /6 ib */
26204 if (have66noF2noF3(pfx
)
26205 && 1==getVexL(pfx
)/*256*/
26206 && epartIsReg(getUChar(delta
))) {
26207 if (gregLO3ofRM(getUChar(delta
)) == 2/*SRL*/) {
26208 delta
= dis_AVX256_shiftE_to_V_imm( pfx
, delta
,
26209 "vpsrld", Iop_ShrN32x8
);
26211 goto decode_success
;
26213 if (gregLO3ofRM(getUChar(delta
)) == 4/*SRA*/) {
26214 delta
= dis_AVX256_shiftE_to_V_imm( pfx
, delta
,
26215 "vpsrad", Iop_SarN32x8
);
26217 goto decode_success
;
26219 if (gregLO3ofRM(getUChar(delta
)) == 6/*SLL*/) {
26220 delta
= dis_AVX256_shiftE_to_V_imm( pfx
, delta
,
26221 "vpslld", Iop_ShlN32x8
);
26223 goto decode_success
;
26225 /* else fall through */
26230 /* VPSRLDQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /3 ib */
26231 /* VPSLLDQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /7 ib */
26232 /* VPSRLQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /2 ib */
26233 /* VPSLLQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /6 ib */
26234 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
26235 && epartIsReg(getUChar(delta
))) {
26236 Int rS
= eregOfRexRM(pfx
,getUChar(delta
));
26237 Int rD
= getVexNvvvv(pfx
);
26238 IRTemp vecS
= newTemp(Ity_V128
);
26239 if (gregLO3ofRM(getUChar(delta
)) == 3) {
26240 Int imm
= (Int
)getUChar(delta
+1);
26241 DIP("vpsrldq $%d,%s,%s\n", imm
, nameXMMReg(rS
), nameXMMReg(rD
));
26243 assign( vecS
, getXMMReg(rS
) );
26244 putYMMRegLoAndZU(rD
, mkexpr(math_PSRLDQ( vecS
, imm
)));
26246 goto decode_success
;
26248 if (gregLO3ofRM(getUChar(delta
)) == 7) {
26249 Int imm
= (Int
)getUChar(delta
+1);
26250 DIP("vpslldq $%d,%s,%s\n", imm
, nameXMMReg(rS
), nameXMMReg(rD
));
26252 assign( vecS
, getXMMReg(rS
) );
26253 putYMMRegLoAndZU(rD
, mkexpr(math_PSLLDQ( vecS
, imm
)));
26255 goto decode_success
;
26257 if (gregLO3ofRM(getUChar(delta
)) == 2) {
26258 delta
= dis_AVX128_shiftE_to_V_imm( pfx
, delta
,
26259 "vpsrlq", Iop_ShrN64x2
);
26261 goto decode_success
;
26263 if (gregLO3ofRM(getUChar(delta
)) == 6) {
26264 delta
= dis_AVX128_shiftE_to_V_imm( pfx
, delta
,
26265 "vpsllq", Iop_ShlN64x2
);
26267 goto decode_success
;
26269 /* else fall through */
26271 /* VPSRLDQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /3 ib */
26272 /* VPSLLDQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /7 ib */
26273 /* VPSRLQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /2 ib */
26274 /* VPSLLQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /6 ib */
26275 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
26276 && epartIsReg(getUChar(delta
))) {
26277 Int rS
= eregOfRexRM(pfx
,getUChar(delta
));
26278 Int rD
= getVexNvvvv(pfx
);
26279 if (gregLO3ofRM(getUChar(delta
)) == 3) {
26280 IRTemp vecS0
= newTemp(Ity_V128
);
26281 IRTemp vecS1
= newTemp(Ity_V128
);
26282 Int imm
= (Int
)getUChar(delta
+1);
26283 DIP("vpsrldq $%d,%s,%s\n", imm
, nameYMMReg(rS
), nameYMMReg(rD
));
26285 assign( vecS0
, getYMMRegLane128(rS
, 0));
26286 assign( vecS1
, getYMMRegLane128(rS
, 1));
26287 putYMMRegLane128(rD
, 0, mkexpr(math_PSRLDQ( vecS0
, imm
)));
26288 putYMMRegLane128(rD
, 1, mkexpr(math_PSRLDQ( vecS1
, imm
)));
26290 goto decode_success
;
26292 if (gregLO3ofRM(getUChar(delta
)) == 7) {
26293 IRTemp vecS0
= newTemp(Ity_V128
);
26294 IRTemp vecS1
= newTemp(Ity_V128
);
26295 Int imm
= (Int
)getUChar(delta
+1);
26296 DIP("vpslldq $%d,%s,%s\n", imm
, nameYMMReg(rS
), nameYMMReg(rD
));
26298 assign( vecS0
, getYMMRegLane128(rS
, 0));
26299 assign( vecS1
, getYMMRegLane128(rS
, 1));
26300 putYMMRegLane128(rD
, 0, mkexpr(math_PSLLDQ( vecS0
, imm
)));
26301 putYMMRegLane128(rD
, 1, mkexpr(math_PSLLDQ( vecS1
, imm
)));
26303 goto decode_success
;
26305 if (gregLO3ofRM(getUChar(delta
)) == 2) {
26306 delta
= dis_AVX256_shiftE_to_V_imm( pfx
, delta
,
26307 "vpsrlq", Iop_ShrN64x4
);
26309 goto decode_success
;
26311 if (gregLO3ofRM(getUChar(delta
)) == 6) {
26312 delta
= dis_AVX256_shiftE_to_V_imm( pfx
, delta
,
26313 "vpsllq", Iop_ShlN64x4
);
26315 goto decode_success
;
26317 /* else fall through */
26322 /* VPCMPEQB r/m, rV, r ::: r = rV `eq-by-8s` r/m */
26323 /* VPCMPEQB = VEX.NDS.128.66.0F.WIG 74 /r */
26324 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26325 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26326 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpeqb", Iop_CmpEQ8x16
);
26327 goto decode_success
;
26329 /* VPCMPEQB r/m, rV, r ::: r = rV `eq-by-8s` r/m */
26330 /* VPCMPEQB = VEX.NDS.256.66.0F.WIG 74 /r */
26331 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26332 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26333 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpeqb", Iop_CmpEQ8x32
);
26334 goto decode_success
;
26339 /* VPCMPEQW r/m, rV, r ::: r = rV `eq-by-16s` r/m */
26340 /* VPCMPEQW = VEX.NDS.128.66.0F.WIG 75 /r */
26341 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26342 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26343 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpeqw", Iop_CmpEQ16x8
);
26344 goto decode_success
;
26346 /* VPCMPEQW r/m, rV, r ::: r = rV `eq-by-16s` r/m */
26347 /* VPCMPEQW = VEX.NDS.256.66.0F.WIG 75 /r */
26348 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26349 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26350 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpeqw", Iop_CmpEQ16x16
);
26351 goto decode_success
;
26356 /* VPCMPEQD r/m, rV, r ::: r = rV `eq-by-32s` r/m */
26357 /* VPCMPEQD = VEX.NDS.128.66.0F.WIG 76 /r */
26358 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26359 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26360 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpeqd", Iop_CmpEQ32x4
);
26361 goto decode_success
;
26363 /* VPCMPEQD r/m, rV, r ::: r = rV `eq-by-32s` r/m */
26364 /* VPCMPEQD = VEX.NDS.256.66.0F.WIG 76 /r */
26365 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26366 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26367 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpeqd", Iop_CmpEQ32x8
);
26368 goto decode_success
;
26373 /* VZEROUPPER = VEX.128.0F.WIG 77 */
26374 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26376 IRTemp zero128
= newTemp(Ity_V128
);
26377 assign(zero128
, mkV128(0));
26378 for (i
= 0; i
< 16; i
++) {
26379 putYMMRegLane128(i
, 1, mkexpr(zero128
));
26381 DIP("vzeroupper\n");
26382 goto decode_success
;
26384 /* VZEROALL = VEX.256.0F.WIG 77 */
26385 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26387 IRTemp zero128
= newTemp(Ity_V128
);
26388 assign(zero128
, mkV128(0));
26389 for (i
= 0; i
< 16; i
++) {
26390 putYMMRegLoAndZU(i
, mkexpr(zero128
));
26393 goto decode_success
;
26399 /* VHADDPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG 7C /r */
26400 /* VHSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG 7D /r */
26401 if (haveF2no66noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26402 IRTemp sV
= newTemp(Ity_V128
);
26403 IRTemp dV
= newTemp(Ity_V128
);
26404 Bool isAdd
= opc
== 0x7C;
26405 const HChar
* str
= isAdd
? "add" : "sub";
26406 UChar modrm
= getUChar(delta
);
26407 UInt rG
= gregOfRexRM(pfx
,modrm
);
26408 UInt rV
= getVexNvvvv(pfx
);
26409 if (epartIsReg(modrm
)) {
26410 UInt rE
= eregOfRexRM(pfx
,modrm
);
26411 assign( sV
, getXMMReg(rE
) );
26412 DIP("vh%spd %s,%s,%s\n", str
, nameXMMReg(rE
),
26413 nameXMMReg(rV
), nameXMMReg(rG
));
26416 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26417 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
26418 DIP("vh%spd %s,%s,%s\n", str
, dis_buf
,
26419 nameXMMReg(rV
), nameXMMReg(rG
));
26422 assign( dV
, getXMMReg(rV
) );
26423 putYMMRegLoAndZU( rG
, mkexpr( math_HADDPS_128 ( dV
, sV
, isAdd
) ) );
26425 goto decode_success
;
26427 /* VHADDPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG 7C /r */
26428 /* VHSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG 7D /r */
26429 if (haveF2no66noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26430 IRTemp sV
= newTemp(Ity_V256
);
26431 IRTemp dV
= newTemp(Ity_V256
);
26432 IRTemp s1
, s0
, d1
, d0
;
26433 Bool isAdd
= opc
== 0x7C;
26434 const HChar
* str
= isAdd
? "add" : "sub";
26435 UChar modrm
= getUChar(delta
);
26436 UInt rG
= gregOfRexRM(pfx
,modrm
);
26437 UInt rV
= getVexNvvvv(pfx
);
26438 s1
= s0
= d1
= d0
= IRTemp_INVALID
;
26439 if (epartIsReg(modrm
)) {
26440 UInt rE
= eregOfRexRM(pfx
,modrm
);
26441 assign( sV
, getYMMReg(rE
) );
26442 DIP("vh%spd %s,%s,%s\n", str
, nameYMMReg(rE
),
26443 nameYMMReg(rV
), nameYMMReg(rG
));
26446 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26447 assign( sV
, loadLE(Ity_V256
, mkexpr(addr
)) );
26448 DIP("vh%spd %s,%s,%s\n", str
, dis_buf
,
26449 nameYMMReg(rV
), nameYMMReg(rG
));
26452 assign( dV
, getYMMReg(rV
) );
26453 breakupV256toV128s( dV
, &d1
, &d0
);
26454 breakupV256toV128s( sV
, &s1
, &s0
);
26455 putYMMReg( rG
, binop(Iop_V128HLtoV256
,
26456 mkexpr( math_HADDPS_128 ( d1
, s1
, isAdd
) ),
26457 mkexpr( math_HADDPS_128 ( d0
, s0
, isAdd
) ) ) );
26459 goto decode_success
;
26461 /* VHADDPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 7C /r */
26462 /* VHSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 7D /r */
26463 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26464 IRTemp sV
= newTemp(Ity_V128
);
26465 IRTemp dV
= newTemp(Ity_V128
);
26466 Bool isAdd
= opc
== 0x7C;
26467 const HChar
* str
= isAdd
? "add" : "sub";
26468 UChar modrm
= getUChar(delta
);
26469 UInt rG
= gregOfRexRM(pfx
,modrm
);
26470 UInt rV
= getVexNvvvv(pfx
);
26471 if (epartIsReg(modrm
)) {
26472 UInt rE
= eregOfRexRM(pfx
,modrm
);
26473 assign( sV
, getXMMReg(rE
) );
26474 DIP("vh%spd %s,%s,%s\n", str
, nameXMMReg(rE
),
26475 nameXMMReg(rV
), nameXMMReg(rG
));
26478 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26479 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
26480 DIP("vh%spd %s,%s,%s\n", str
, dis_buf
,
26481 nameXMMReg(rV
), nameXMMReg(rG
));
26484 assign( dV
, getXMMReg(rV
) );
26485 putYMMRegLoAndZU( rG
, mkexpr( math_HADDPD_128 ( dV
, sV
, isAdd
) ) );
26487 goto decode_success
;
26489 /* VHADDPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 7C /r */
26490 /* VHSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 7D /r */
26491 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26492 IRTemp sV
= newTemp(Ity_V256
);
26493 IRTemp dV
= newTemp(Ity_V256
);
26494 IRTemp s1
, s0
, d1
, d0
;
26495 Bool isAdd
= opc
== 0x7C;
26496 const HChar
* str
= isAdd
? "add" : "sub";
26497 UChar modrm
= getUChar(delta
);
26498 UInt rG
= gregOfRexRM(pfx
,modrm
);
26499 UInt rV
= getVexNvvvv(pfx
);
26500 s1
= s0
= d1
= d0
= IRTemp_INVALID
;
26501 if (epartIsReg(modrm
)) {
26502 UInt rE
= eregOfRexRM(pfx
,modrm
);
26503 assign( sV
, getYMMReg(rE
) );
26504 DIP("vh%spd %s,%s,%s\n", str
, nameYMMReg(rE
),
26505 nameYMMReg(rV
), nameYMMReg(rG
));
26508 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26509 assign( sV
, loadLE(Ity_V256
, mkexpr(addr
)) );
26510 DIP("vh%spd %s,%s,%s\n", str
, dis_buf
,
26511 nameYMMReg(rV
), nameYMMReg(rG
));
26514 assign( dV
, getYMMReg(rV
) );
26515 breakupV256toV128s( dV
, &d1
, &d0
);
26516 breakupV256toV128s( sV
, &s1
, &s0
);
26517 putYMMReg( rG
, binop(Iop_V128HLtoV256
,
26518 mkexpr( math_HADDPD_128 ( d1
, s1
, isAdd
) ),
26519 mkexpr( math_HADDPD_128 ( d0
, s0
, isAdd
) ) ) );
26521 goto decode_success
;
26526 /* Note the Intel docs don't make sense for this. I think they
26527 are wrong. They seem to imply it is a store when in fact I
26528 think it is a load. Also it's unclear whether this is W0, W1
26530 /* VMOVQ xmm2/m64, xmm1 = VEX.128.F3.0F.W0 7E /r */
26531 if (haveF3no66noF2(pfx
)
26532 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
26533 vassert(sz
== 4); /* even tho we are transferring 8, not 4. */
26534 UChar modrm
= getUChar(delta
);
26535 UInt rG
= gregOfRexRM(pfx
,modrm
);
26536 if (epartIsReg(modrm
)) {
26537 UInt rE
= eregOfRexRM(pfx
,modrm
);
26538 putXMMRegLane64( rG
, 0, getXMMRegLane64( rE
, 0 ));
26539 DIP("vmovq %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
26542 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26543 putXMMRegLane64( rG
, 0, loadLE(Ity_I64
, mkexpr(addr
)) );
26544 DIP("vmovq %s,%s\n", dis_buf
, nameXMMReg(rG
));
26547 /* zero bits 255:64 */
26548 putXMMRegLane64( rG
, 1, mkU64(0) );
26549 putYMMRegLane128( rG
, 1, mkV128(0) );
26550 goto decode_success
;
26552 /* VMOVQ xmm1, r64 = VEX.128.66.0F.W1 7E /r (reg case only) */
26553 /* Moves from G to E, so is a store-form insn */
26554 /* Intel docs list this in the VMOVD entry for some reason. */
26555 if (have66noF2noF3(pfx
)
26556 && 0==getVexL(pfx
)/*128*/ && 1==getRexW(pfx
)/*W1*/) {
26557 UChar modrm
= getUChar(delta
);
26558 UInt rG
= gregOfRexRM(pfx
,modrm
);
26559 if (epartIsReg(modrm
)) {
26560 UInt rE
= eregOfRexRM(pfx
,modrm
);
26561 DIP("vmovq %s,%s\n", nameXMMReg(rG
), nameIReg64(rE
));
26562 putIReg64(rE
, getXMMRegLane64(rG
, 0));
26565 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26566 storeLE( mkexpr(addr
), getXMMRegLane64(rG
, 0) );
26567 DIP("vmovq %s,%s\n", dis_buf
, nameXMMReg(rG
));
26570 goto decode_success
;
26572 /* VMOVD xmm1, m32/r32 = VEX.128.66.0F.W0 7E /r (reg case only) */
26573 /* Moves from G to E, so is a store-form insn */
26574 if (have66noF2noF3(pfx
)
26575 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
26576 UChar modrm
= getUChar(delta
);
26577 UInt rG
= gregOfRexRM(pfx
,modrm
);
26578 if (epartIsReg(modrm
)) {
26579 UInt rE
= eregOfRexRM(pfx
,modrm
);
26580 DIP("vmovd %s,%s\n", nameXMMReg(rG
), nameIReg32(rE
));
26581 putIReg32(rE
, getXMMRegLane32(rG
, 0));
26584 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26585 storeLE( mkexpr(addr
), getXMMRegLane32(rG
, 0) );
26586 DIP("vmovd %s,%s\n", dis_buf
, nameXMMReg(rG
));
26589 goto decode_success
;
26594 /* VMOVDQA ymm1, ymm2/m256 = VEX.256.66.0F.WIG 7F */
26595 /* VMOVDQU ymm1, ymm2/m256 = VEX.256.F3.0F.WIG 7F */
26596 if ((have66noF2noF3(pfx
) || haveF3no66noF2(pfx
))
26597 && 1==getVexL(pfx
)/*256*/) {
26598 UChar modrm
= getUChar(delta
);
26599 UInt rS
= gregOfRexRM(pfx
, modrm
);
26600 IRTemp tS
= newTemp(Ity_V256
);
26601 Bool isA
= have66noF2noF3(pfx
);
26602 HChar ch
= isA
? 'a' : 'u';
26603 assign(tS
, getYMMReg(rS
));
26604 if (epartIsReg(modrm
)) {
26605 UInt rD
= eregOfRexRM(pfx
, modrm
);
26607 putYMMReg(rD
, mkexpr(tS
));
26608 DIP("vmovdq%c %s,%s\n", ch
, nameYMMReg(rS
), nameYMMReg(rD
));
26610 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26613 gen_SIGNAL_if_not_32_aligned(vbi
, addr
);
26614 storeLE(mkexpr(addr
), mkexpr(tS
));
26615 DIP("vmovdq%c %s,%s\n", ch
, nameYMMReg(rS
), dis_buf
);
26617 goto decode_success
;
26619 /* VMOVDQA xmm1, xmm2/m128 = VEX.128.66.0F.WIG 7F */
26620 /* VMOVDQU xmm1, xmm2/m128 = VEX.128.F3.0F.WIG 7F */
26621 if ((have66noF2noF3(pfx
) || haveF3no66noF2(pfx
))
26622 && 0==getVexL(pfx
)/*128*/) {
26623 UChar modrm
= getUChar(delta
);
26624 UInt rS
= gregOfRexRM(pfx
, modrm
);
26625 IRTemp tS
= newTemp(Ity_V128
);
26626 Bool isA
= have66noF2noF3(pfx
);
26627 HChar ch
= isA
? 'a' : 'u';
26628 assign(tS
, getXMMReg(rS
));
26629 if (epartIsReg(modrm
)) {
26630 UInt rD
= eregOfRexRM(pfx
, modrm
);
26632 putYMMRegLoAndZU(rD
, mkexpr(tS
));
26633 DIP("vmovdq%c %s,%s\n", ch
, nameXMMReg(rS
), nameXMMReg(rD
));
26635 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
26638 gen_SIGNAL_if_not_16_aligned(vbi
, addr
);
26639 storeLE(mkexpr(addr
), mkexpr(tS
));
26640 DIP("vmovdq%c %s,%s\n", ch
, nameXMMReg(rS
), dis_buf
);
26642 goto decode_success
;
26647 /* VSTMXCSR m32 = VEX.LZ.0F.WIG AE /3 */
26648 if (haveNo66noF2noF3(pfx
)
26649 && 0==getVexL(pfx
)/*LZ*/
26650 && 0==getRexW(pfx
) /* be paranoid -- Intel docs don't require this */
26651 && !epartIsReg(getUChar(delta
)) && gregLO3ofRM(getUChar(delta
)) == 3
26653 delta
= dis_STMXCSR(vbi
, pfx
, delta
, True
/*isAvx*/);
26654 goto decode_success
;
26656 /* VLDMXCSR m32 = VEX.LZ.0F.WIG AE /2 */
26657 if (haveNo66noF2noF3(pfx
)
26658 && 0==getVexL(pfx
)/*LZ*/
26659 && 0==getRexW(pfx
) /* be paranoid -- Intel docs don't require this */
26660 && !epartIsReg(getUChar(delta
)) && gregLO3ofRM(getUChar(delta
)) == 2
26662 delta
= dis_LDMXCSR(vbi
, pfx
, delta
, True
/*isAvx*/);
26663 goto decode_success
;
26668 /* VCMPSD xmm3/m64(E=argL), xmm2(V=argR), xmm1(G) */
26669 /* = VEX.NDS.LIG.F2.0F.WIG C2 /r ib */
26670 if (haveF2no66noF3(pfx
)) {
26671 Long delta0
= delta
;
26672 delta
= dis_AVX128_cmp_V_E_to_G( uses_vvvv
, vbi
, pfx
, delta
,
26673 "vcmpsd", False
/*!all_lanes*/,
26675 if (delta
> delta0
) goto decode_success
;
26676 /* else fall through -- decoding has failed */
26678 /* VCMPSS xmm3/m32(E=argL), xmm2(V=argR), xmm1(G) */
26679 /* = VEX.NDS.LIG.F3.0F.WIG C2 /r ib */
26680 if (haveF3no66noF2(pfx
)) {
26681 Long delta0
= delta
;
26682 delta
= dis_AVX128_cmp_V_E_to_G( uses_vvvv
, vbi
, pfx
, delta
,
26683 "vcmpss", False
/*!all_lanes*/,
26685 if (delta
> delta0
) goto decode_success
;
26686 /* else fall through -- decoding has failed */
26688 /* VCMPPD xmm3/m128(E=argL), xmm2(V=argR), xmm1(G) */
26689 /* = VEX.NDS.128.66.0F.WIG C2 /r ib */
26690 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26691 Long delta0
= delta
;
26692 delta
= dis_AVX128_cmp_V_E_to_G( uses_vvvv
, vbi
, pfx
, delta
,
26693 "vcmppd", True
/*all_lanes*/,
26695 if (delta
> delta0
) goto decode_success
;
26696 /* else fall through -- decoding has failed */
26698 /* VCMPPD ymm3/m256(E=argL), ymm2(V=argR), ymm1(G) */
26699 /* = VEX.NDS.256.66.0F.WIG C2 /r ib */
26700 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26701 Long delta0
= delta
;
26702 delta
= dis_AVX256_cmp_V_E_to_G( uses_vvvv
, vbi
, pfx
, delta
,
26703 "vcmppd", 8/*sz*/);
26704 if (delta
> delta0
) goto decode_success
;
26705 /* else fall through -- decoding has failed */
26707 /* VCMPPS xmm3/m128(E=argL), xmm2(V=argR), xmm1(G) */
26708 /* = VEX.NDS.128.0F.WIG C2 /r ib */
26709 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26710 Long delta0
= delta
;
26711 delta
= dis_AVX128_cmp_V_E_to_G( uses_vvvv
, vbi
, pfx
, delta
,
26712 "vcmpps", True
/*all_lanes*/,
26714 if (delta
> delta0
) goto decode_success
;
26715 /* else fall through -- decoding has failed */
26717 /* VCMPPS ymm3/m256(E=argL), ymm2(V=argR), ymm1(G) */
26718 /* = VEX.NDS.256.0F.WIG C2 /r ib */
26719 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26720 Long delta0
= delta
;
26721 delta
= dis_AVX256_cmp_V_E_to_G( uses_vvvv
, vbi
, pfx
, delta
,
26722 "vcmpps", 4/*sz*/);
26723 if (delta
> delta0
) goto decode_success
;
26724 /* else fall through -- decoding has failed */
26729 /* VPINSRW r32/m16, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG C4 /r ib */
26730 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26731 UChar modrm
= getUChar(delta
);
26732 UInt rG
= gregOfRexRM(pfx
, modrm
);
26733 UInt rV
= getVexNvvvv(pfx
);
26735 IRTemp new16
= newTemp(Ity_I16
);
26737 if ( epartIsReg( modrm
) ) {
26738 imm8
= (Int
)(getUChar(delta
+1) & 7);
26739 assign( new16
, unop(Iop_32to16
,
26740 getIReg32(eregOfRexRM(pfx
,modrm
))) );
26742 DIP( "vpinsrw $%d,%s,%s\n", imm8
,
26743 nameIReg32( eregOfRexRM(pfx
, modrm
) ), nameXMMReg(rG
) );
26745 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
26746 imm8
= (Int
)(getUChar(delta
+alen
) & 7);
26747 assign( new16
, loadLE( Ity_I16
, mkexpr(addr
) ));
26749 DIP( "vpinsrw $%d,%s,%s\n",
26750 imm8
, dis_buf
, nameXMMReg(rG
) );
26753 IRTemp src_vec
= newTemp(Ity_V128
);
26754 assign(src_vec
, getXMMReg( rV
));
26755 IRTemp res_vec
= math_PINSRW_128( src_vec
, new16
, imm8
);
26756 putYMMRegLoAndZU( rG
, mkexpr(res_vec
) );
26758 goto decode_success
;
26763 /* VPEXTRW imm8, xmm1, reg32 = VEX.128.66.0F.W0 C5 /r ib */
26764 if (have66noF2noF3(pfx
)
26765 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
26766 Long delta0
= delta
;
26767 delta
= dis_PEXTRW_128_EregOnly_toG( vbi
, pfx
, delta
,
26769 if (delta
> delta0
) goto decode_success
;
26770 /* else fall through -- decoding has failed */
26775 /* VSHUFPS imm8, xmm3/m128, xmm2, xmm1, xmm2 */
26776 /* = VEX.NDS.128.0F.WIG C6 /r ib */
26777 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26779 IRTemp eV
= newTemp(Ity_V128
);
26780 IRTemp vV
= newTemp(Ity_V128
);
26781 UInt modrm
= getUChar(delta
);
26782 UInt rG
= gregOfRexRM(pfx
,modrm
);
26783 UInt rV
= getVexNvvvv(pfx
);
26784 assign( vV
, getXMMReg(rV
) );
26785 if (epartIsReg(modrm
)) {
26786 UInt rE
= eregOfRexRM(pfx
,modrm
);
26787 assign( eV
, getXMMReg(rE
) );
26788 imm8
= (Int
)getUChar(delta
+1);
26790 DIP("vshufps $%d,%s,%s,%s\n",
26791 imm8
, nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
26793 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
26794 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
26795 imm8
= (Int
)getUChar(delta
+alen
);
26797 DIP("vshufps $%d,%s,%s,%s\n",
26798 imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
26800 IRTemp res
= math_SHUFPS_128( eV
, vV
, imm8
);
26801 putYMMRegLoAndZU( rG
, mkexpr(res
) );
26803 goto decode_success
;
26805 /* VSHUFPS imm8, ymm3/m256, ymm2, ymm1, ymm2 */
26806 /* = VEX.NDS.256.0F.WIG C6 /r ib */
26807 if (haveNo66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26809 IRTemp eV
= newTemp(Ity_V256
);
26810 IRTemp vV
= newTemp(Ity_V256
);
26811 UInt modrm
= getUChar(delta
);
26812 UInt rG
= gregOfRexRM(pfx
,modrm
);
26813 UInt rV
= getVexNvvvv(pfx
);
26814 assign( vV
, getYMMReg(rV
) );
26815 if (epartIsReg(modrm
)) {
26816 UInt rE
= eregOfRexRM(pfx
,modrm
);
26817 assign( eV
, getYMMReg(rE
) );
26818 imm8
= (Int
)getUChar(delta
+1);
26820 DIP("vshufps $%d,%s,%s,%s\n",
26821 imm8
, nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
26823 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
26824 assign( eV
, loadLE(Ity_V256
, mkexpr(addr
)) );
26825 imm8
= (Int
)getUChar(delta
+alen
);
26827 DIP("vshufps $%d,%s,%s,%s\n",
26828 imm8
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
26830 IRTemp res
= math_SHUFPS_256( eV
, vV
, imm8
);
26831 putYMMReg( rG
, mkexpr(res
) );
26833 goto decode_success
;
26835 /* VSHUFPD imm8, xmm3/m128, xmm2, xmm1, xmm2 */
26836 /* = VEX.NDS.128.66.0F.WIG C6 /r ib */
26837 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26839 IRTemp eV
= newTemp(Ity_V128
);
26840 IRTemp vV
= newTemp(Ity_V128
);
26841 UInt modrm
= getUChar(delta
);
26842 UInt rG
= gregOfRexRM(pfx
,modrm
);
26843 UInt rV
= getVexNvvvv(pfx
);
26844 assign( vV
, getXMMReg(rV
) );
26845 if (epartIsReg(modrm
)) {
26846 UInt rE
= eregOfRexRM(pfx
,modrm
);
26847 assign( eV
, getXMMReg(rE
) );
26848 imm8
= (Int
)getUChar(delta
+1);
26850 DIP("vshufpd $%d,%s,%s,%s\n",
26851 imm8
, nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
26853 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
26854 assign( eV
, loadLE(Ity_V128
, mkexpr(addr
)) );
26855 imm8
= (Int
)getUChar(delta
+alen
);
26857 DIP("vshufpd $%d,%s,%s,%s\n",
26858 imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
26860 IRTemp res
= math_SHUFPD_128( eV
, vV
, imm8
);
26861 putYMMRegLoAndZU( rG
, mkexpr(res
) );
26863 goto decode_success
;
26865 /* VSHUFPD imm8, ymm3/m256, ymm2, ymm1, ymm2 */
26866 /* = VEX.NDS.256.66.0F.WIG C6 /r ib */
26867 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26869 IRTemp eV
= newTemp(Ity_V256
);
26870 IRTemp vV
= newTemp(Ity_V256
);
26871 UInt modrm
= getUChar(delta
);
26872 UInt rG
= gregOfRexRM(pfx
,modrm
);
26873 UInt rV
= getVexNvvvv(pfx
);
26874 assign( vV
, getYMMReg(rV
) );
26875 if (epartIsReg(modrm
)) {
26876 UInt rE
= eregOfRexRM(pfx
,modrm
);
26877 assign( eV
, getYMMReg(rE
) );
26878 imm8
= (Int
)getUChar(delta
+1);
26880 DIP("vshufpd $%d,%s,%s,%s\n",
26881 imm8
, nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
26883 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
26884 assign( eV
, loadLE(Ity_V256
, mkexpr(addr
)) );
26885 imm8
= (Int
)getUChar(delta
+alen
);
26887 DIP("vshufpd $%d,%s,%s,%s\n",
26888 imm8
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
26890 IRTemp res
= math_SHUFPD_256( eV
, vV
, imm8
);
26891 putYMMReg( rG
, mkexpr(res
) );
26893 goto decode_success
;
26898 /* VADDSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D0 /r */
26899 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26900 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
26901 uses_vvvv
, vbi
, pfx
, delta
,
26902 "vaddsubpd", math_ADDSUBPD_128
);
26903 goto decode_success
;
26905 /* VADDSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D0 /r */
26906 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26907 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
26908 uses_vvvv
, vbi
, pfx
, delta
,
26909 "vaddsubpd", math_ADDSUBPD_256
);
26910 goto decode_success
;
26912 /* VADDSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG D0 /r */
26913 if (haveF2no66noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26914 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
26915 uses_vvvv
, vbi
, pfx
, delta
,
26916 "vaddsubps", math_ADDSUBPS_128
);
26917 goto decode_success
;
26919 /* VADDSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG D0 /r */
26920 if (haveF2no66noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26921 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
26922 uses_vvvv
, vbi
, pfx
, delta
,
26923 "vaddsubps", math_ADDSUBPS_256
);
26924 goto decode_success
;
26929 /* VPSRLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D1 /r */
26930 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26931 delta
= dis_AVX128_shiftV_byE( vbi
, pfx
, delta
,
26932 "vpsrlw", Iop_ShrN16x8
);
26934 goto decode_success
;
26937 /* VPSRLW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D1 /r */
26938 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26939 delta
= dis_AVX256_shiftV_byE( vbi
, pfx
, delta
,
26940 "vpsrlw", Iop_ShrN16x16
);
26942 goto decode_success
;
26948 /* VPSRLD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D2 /r */
26949 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26950 delta
= dis_AVX128_shiftV_byE( vbi
, pfx
, delta
,
26951 "vpsrld", Iop_ShrN32x4
);
26953 goto decode_success
;
26955 /* VPSRLD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D2 /r */
26956 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26957 delta
= dis_AVX256_shiftV_byE( vbi
, pfx
, delta
,
26958 "vpsrld", Iop_ShrN32x8
);
26960 goto decode_success
;
26965 /* VPSRLQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D3 /r */
26966 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26967 delta
= dis_AVX128_shiftV_byE( vbi
, pfx
, delta
,
26968 "vpsrlq", Iop_ShrN64x2
);
26970 goto decode_success
;
26972 /* VPSRLQ xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D3 /r */
26973 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26974 delta
= dis_AVX256_shiftV_byE( vbi
, pfx
, delta
,
26975 "vpsrlq", Iop_ShrN64x4
);
26977 goto decode_success
;
26982 /* VPADDQ r/m, rV, r ::: r = rV + r/m */
26983 /* VPADDQ = VEX.NDS.128.66.0F.WIG D4 /r */
26984 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
26985 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26986 uses_vvvv
, vbi
, pfx
, delta
, "vpaddq", Iop_Add64x2
);
26987 goto decode_success
;
26989 /* VPADDQ r/m, rV, r ::: r = rV + r/m */
26990 /* VPADDQ = VEX.NDS.256.66.0F.WIG D4 /r */
26991 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
26992 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26993 uses_vvvv
, vbi
, pfx
, delta
, "vpaddq", Iop_Add64x4
);
26994 goto decode_success
;
26999 /* VPMULLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D5 /r */
27000 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27001 delta
= dis_AVX128_E_V_to_G(
27002 uses_vvvv
, vbi
, pfx
, delta
, "vpmullw", Iop_Mul16x8
);
27003 goto decode_success
;
27005 /* VPMULLW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D5 /r */
27006 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27007 delta
= dis_AVX256_E_V_to_G(
27008 uses_vvvv
, vbi
, pfx
, delta
, "vpmullw", Iop_Mul16x16
);
27009 goto decode_success
;
27014 /* I can't even find any Intel docs for this one. */
27015 /* Basically: 66 0F D6 = MOVQ -- move 64 bits from G (lo half
27016 xmm) to E (mem or lo half xmm). Looks like L==0(128), W==0
27018 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
27019 && 0==getRexW(pfx
)/*this might be redundant, dunno*/) {
27020 UChar modrm
= getUChar(delta
);
27021 UInt rG
= gregOfRexRM(pfx
,modrm
);
27022 if (epartIsReg(modrm
)) {
27023 /* fall through, awaiting test case */
27024 /* dst: lo half copied, hi half zeroed */
27026 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
27027 storeLE( mkexpr(addr
), getXMMRegLane64( rG
, 0 ));
27028 DIP("vmovq %s,%s\n", nameXMMReg(rG
), dis_buf
);
27030 goto decode_success
;
27036 /* VEX.128.66.0F.WIG D7 /r = VPMOVMSKB xmm1, r32 */
27037 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27038 delta
= dis_PMOVMSKB_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
27039 goto decode_success
;
27041 /* VEX.128.66.0F.WIG D7 /r = VPMOVMSKB ymm1, r32 */
27042 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27043 delta
= dis_PMOVMSKB_256( vbi
, pfx
, delta
);
27044 goto decode_success
;
27049 /* VPSUBUSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D8 /r */
27050 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27051 delta
= dis_AVX128_E_V_to_G(
27052 uses_vvvv
, vbi
, pfx
, delta
, "vpsubusb", Iop_QSub8Ux16
);
27053 goto decode_success
;
27055 /* VPSUBUSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D8 /r */
27056 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27057 delta
= dis_AVX256_E_V_to_G(
27058 uses_vvvv
, vbi
, pfx
, delta
, "vpsubusb", Iop_QSub8Ux32
);
27059 goto decode_success
;
27064 /* VPSUBUSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D9 /r */
27065 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27066 delta
= dis_AVX128_E_V_to_G(
27067 uses_vvvv
, vbi
, pfx
, delta
, "vpsubusw", Iop_QSub16Ux8
);
27068 goto decode_success
;
27070 /* VPSUBUSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D9 /r */
27071 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27072 delta
= dis_AVX256_E_V_to_G(
27073 uses_vvvv
, vbi
, pfx
, delta
, "vpsubusw", Iop_QSub16Ux16
);
27074 goto decode_success
;
27079 /* VPMINUB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DA /r */
27080 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27081 delta
= dis_AVX128_E_V_to_G(
27082 uses_vvvv
, vbi
, pfx
, delta
, "vpminub", Iop_Min8Ux16
);
27083 goto decode_success
;
27085 /* VPMINUB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DA /r */
27086 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27087 delta
= dis_AVX256_E_V_to_G(
27088 uses_vvvv
, vbi
, pfx
, delta
, "vpminub", Iop_Min8Ux32
);
27089 goto decode_success
;
27094 /* VPAND r/m, rV, r ::: r = rV & r/m */
27095 /* VEX.NDS.128.66.0F.WIG DB /r = VPAND xmm3/m128, xmm2, xmm1 */
27096 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27097 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27098 uses_vvvv
, vbi
, pfx
, delta
, "vpand", Iop_AndV128
);
27099 goto decode_success
;
27101 /* VPAND r/m, rV, r ::: r = rV & r/m */
27102 /* VEX.NDS.256.66.0F.WIG DB /r = VPAND ymm3/m256, ymm2, ymm1 */
27103 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27104 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27105 uses_vvvv
, vbi
, pfx
, delta
, "vpand", Iop_AndV256
);
27106 goto decode_success
;
27111 /* VPADDUSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DC /r */
27112 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27113 delta
= dis_AVX128_E_V_to_G(
27114 uses_vvvv
, vbi
, pfx
, delta
, "vpaddusb", Iop_QAdd8Ux16
);
27115 goto decode_success
;
27117 /* VPADDUSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DC /r */
27118 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27119 delta
= dis_AVX256_E_V_to_G(
27120 uses_vvvv
, vbi
, pfx
, delta
, "vpaddusb", Iop_QAdd8Ux32
);
27121 goto decode_success
;
27126 /* VPADDUSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DD /r */
27127 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27128 delta
= dis_AVX128_E_V_to_G(
27129 uses_vvvv
, vbi
, pfx
, delta
, "vpaddusw", Iop_QAdd16Ux8
);
27130 goto decode_success
;
27132 /* VPADDUSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DD /r */
27133 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27134 delta
= dis_AVX256_E_V_to_G(
27135 uses_vvvv
, vbi
, pfx
, delta
, "vpaddusw", Iop_QAdd16Ux16
);
27136 goto decode_success
;
27141 /* VPMAXUB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DE /r */
27142 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27143 delta
= dis_AVX128_E_V_to_G(
27144 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxub", Iop_Max8Ux16
);
27145 goto decode_success
;
27147 /* VPMAXUB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DE /r */
27148 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27149 delta
= dis_AVX256_E_V_to_G(
27150 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxub", Iop_Max8Ux32
);
27151 goto decode_success
;
27156 /* VPANDN r/m, rV, r ::: r = rV & ~r/m (is that correct, re the ~ ?) */
27157 /* VEX.NDS.128.66.0F.WIG DF /r = VPANDN xmm3/m128, xmm2, xmm1 */
27158 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27159 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
27160 uses_vvvv
, vbi
, pfx
, delta
, "vpandn", Iop_AndV128
,
27161 NULL
, True
/*invertLeftArg*/, False
/*swapArgs*/ );
27162 goto decode_success
;
27164 /* VPANDN r/m, rV, r ::: r = rV & ~r/m (is that correct, re the ~ ?) */
27165 /* VEX.NDS.256.66.0F.WIG DF /r = VPANDN ymm3/m256, ymm2, ymm1 */
27166 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27167 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
27168 uses_vvvv
, vbi
, pfx
, delta
, "vpandn", Iop_AndV256
,
27169 NULL
, True
/*invertLeftArg*/, False
/*swapArgs*/ );
27170 goto decode_success
;
27175 /* VPAVGB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E0 /r */
27176 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27177 delta
= dis_AVX128_E_V_to_G(
27178 uses_vvvv
, vbi
, pfx
, delta
, "vpavgb", Iop_Avg8Ux16
);
27179 goto decode_success
;
27181 /* VPAVGB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E0 /r */
27182 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27183 delta
= dis_AVX256_E_V_to_G(
27184 uses_vvvv
, vbi
, pfx
, delta
, "vpavgb", Iop_Avg8Ux32
);
27185 goto decode_success
;
27190 /* VPSRAW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E1 /r */
27191 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27192 delta
= dis_AVX128_shiftV_byE( vbi
, pfx
, delta
,
27193 "vpsraw", Iop_SarN16x8
);
27195 goto decode_success
;
27197 /* VPSRAW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E1 /r */
27198 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27199 delta
= dis_AVX256_shiftV_byE( vbi
, pfx
, delta
,
27200 "vpsraw", Iop_SarN16x16
);
27202 goto decode_success
;
27207 /* VPSRAD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E2 /r */
27208 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27209 delta
= dis_AVX128_shiftV_byE( vbi
, pfx
, delta
,
27210 "vpsrad", Iop_SarN32x4
);
27212 goto decode_success
;
27214 /* VPSRAD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E2 /r */
27215 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27216 delta
= dis_AVX256_shiftV_byE( vbi
, pfx
, delta
,
27217 "vpsrad", Iop_SarN32x8
);
27219 goto decode_success
;
27224 /* VPAVGW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E3 /r */
27225 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27226 delta
= dis_AVX128_E_V_to_G(
27227 uses_vvvv
, vbi
, pfx
, delta
, "vpavgw", Iop_Avg16Ux8
);
27228 goto decode_success
;
27230 /* VPAVGW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E3 /r */
27231 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27232 delta
= dis_AVX256_E_V_to_G(
27233 uses_vvvv
, vbi
, pfx
, delta
, "vpavgw", Iop_Avg16Ux16
);
27234 goto decode_success
;
27239 /* VPMULHUW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E4 /r */
27240 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27241 delta
= dis_AVX128_E_V_to_G(
27242 uses_vvvv
, vbi
, pfx
, delta
, "vpmulhuw", Iop_MulHi16Ux8
);
27243 goto decode_success
;
27245 /* VPMULHUW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E4 /r */
27246 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27247 delta
= dis_AVX256_E_V_to_G(
27248 uses_vvvv
, vbi
, pfx
, delta
, "vpmulhuw", Iop_MulHi16Ux16
);
27249 goto decode_success
;
27254 /* VPMULHW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E5 /r */
27255 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27256 delta
= dis_AVX128_E_V_to_G(
27257 uses_vvvv
, vbi
, pfx
, delta
, "vpmulhw", Iop_MulHi16Sx8
);
27258 goto decode_success
;
27260 /* VPMULHW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E5 /r */
27261 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27262 delta
= dis_AVX256_E_V_to_G(
27263 uses_vvvv
, vbi
, pfx
, delta
, "vpmulhw", Iop_MulHi16Sx16
);
27264 goto decode_success
;
27269 /* VCVTDQ2PD xmm2/m64, xmm1 = VEX.128.F3.0F.WIG E6 /r */
27270 if (haveF3no66noF2(pfx
) && 0==getVexL(pfx
)/*128*/) {
27271 delta
= dis_CVTDQ2PD_128(vbi
, pfx
, delta
, True
/*isAvx*/);
27272 goto decode_success
;
27274 /* VCVTDQ2PD xmm2/m128, ymm1 = VEX.256.F3.0F.WIG E6 /r */
27275 if (haveF3no66noF2(pfx
) && 1==getVexL(pfx
)/*256*/) {
27276 delta
= dis_CVTDQ2PD_256(vbi
, pfx
, delta
);
27277 goto decode_success
;
27279 /* VCVTTPD2DQ xmm2/m128, xmm1 = VEX.128.66.0F.WIG E6 /r */
27280 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27281 delta
= dis_CVTxPD2DQ_128(vbi
, pfx
, delta
, True
/*isAvx*/,
27283 goto decode_success
;
27285 /* VCVTTPD2DQ ymm2/m256, xmm1 = VEX.256.66.0F.WIG E6 /r */
27286 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27287 delta
= dis_CVTxPD2DQ_256(vbi
, pfx
, delta
, True
/*r2zero*/);
27288 goto decode_success
;
27290 /* VCVTPD2DQ xmm2/m128, xmm1 = VEX.128.F2.0F.WIG E6 /r */
27291 if (haveF2no66noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27292 delta
= dis_CVTxPD2DQ_128(vbi
, pfx
, delta
, True
/*isAvx*/,
27294 goto decode_success
;
27296 /* VCVTPD2DQ ymm2/m256, xmm1 = VEX.256.F2.0F.WIG E6 /r */
27297 if (haveF2no66noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27298 delta
= dis_CVTxPD2DQ_256(vbi
, pfx
, delta
, False
/*!r2zero*/);
27299 goto decode_success
;
27304 /* VMOVNTDQ xmm1, m128 = VEX.128.66.0F.WIG E7 /r */
27305 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27306 UChar modrm
= getUChar(delta
);
27307 UInt rG
= gregOfRexRM(pfx
,modrm
);
27308 if (!epartIsReg(modrm
)) {
27309 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
27310 gen_SIGNAL_if_not_16_aligned( vbi
, addr
);
27311 storeLE( mkexpr(addr
), getXMMReg(rG
) );
27312 DIP("vmovntdq %s,%s\n", dis_buf
, nameXMMReg(rG
));
27314 goto decode_success
;
27316 /* else fall through */
27318 /* VMOVNTDQ ymm1, m256 = VEX.256.66.0F.WIG E7 /r */
27319 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27320 UChar modrm
= getUChar(delta
);
27321 UInt rG
= gregOfRexRM(pfx
,modrm
);
27322 if (!epartIsReg(modrm
)) {
27323 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
27324 gen_SIGNAL_if_not_32_aligned( vbi
, addr
);
27325 storeLE( mkexpr(addr
), getYMMReg(rG
) );
27326 DIP("vmovntdq %s,%s\n", dis_buf
, nameYMMReg(rG
));
27328 goto decode_success
;
27330 /* else fall through */
27335 /* VPSUBSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E8 /r */
27336 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27337 delta
= dis_AVX128_E_V_to_G(
27338 uses_vvvv
, vbi
, pfx
, delta
, "vpsubsb", Iop_QSub8Sx16
);
27339 goto decode_success
;
27341 /* VPSUBSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E8 /r */
27342 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27343 delta
= dis_AVX256_E_V_to_G(
27344 uses_vvvv
, vbi
, pfx
, delta
, "vpsubsb", Iop_QSub8Sx32
);
27345 goto decode_success
;
27350 /* VPSUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E9 /r */
27351 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27352 delta
= dis_AVX128_E_V_to_G(
27353 uses_vvvv
, vbi
, pfx
, delta
, "vpsubsw", Iop_QSub16Sx8
);
27354 goto decode_success
;
27356 /* VPSUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E9 /r */
27357 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27358 delta
= dis_AVX256_E_V_to_G(
27359 uses_vvvv
, vbi
, pfx
, delta
, "vpsubsw", Iop_QSub16Sx16
);
27360 goto decode_success
;
27365 /* VPMINSW r/m, rV, r ::: r = min-signed16s(rV, r/m) */
27366 /* VPMINSW = VEX.NDS.128.66.0F.WIG EA /r */
27367 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27368 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27369 uses_vvvv
, vbi
, pfx
, delta
, "vpminsw", Iop_Min16Sx8
);
27370 goto decode_success
;
27372 /* VPMINSW r/m, rV, r ::: r = min-signed16s(rV, r/m) */
27373 /* VPMINSW = VEX.NDS.256.66.0F.WIG EA /r */
27374 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27375 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27376 uses_vvvv
, vbi
, pfx
, delta
, "vpminsw", Iop_Min16Sx16
);
27377 goto decode_success
;
27382 /* VPOR r/m, rV, r ::: r = rV | r/m */
27383 /* VPOR = VEX.NDS.128.66.0F.WIG EB /r */
27384 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27385 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27386 uses_vvvv
, vbi
, pfx
, delta
, "vpor", Iop_OrV128
);
27387 goto decode_success
;
27389 /* VPOR r/m, rV, r ::: r = rV | r/m */
27390 /* VPOR = VEX.NDS.256.66.0F.WIG EB /r */
27391 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27392 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27393 uses_vvvv
, vbi
, pfx
, delta
, "vpor", Iop_OrV256
);
27394 goto decode_success
;
27399 /* VPADDSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG EC /r */
27400 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27401 delta
= dis_AVX128_E_V_to_G(
27402 uses_vvvv
, vbi
, pfx
, delta
, "vpaddsb", Iop_QAdd8Sx16
);
27403 goto decode_success
;
27405 /* VPADDSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG EC /r */
27406 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27407 delta
= dis_AVX256_E_V_to_G(
27408 uses_vvvv
, vbi
, pfx
, delta
, "vpaddsb", Iop_QAdd8Sx32
);
27409 goto decode_success
;
27414 /* VPADDSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG ED /r */
27415 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27416 delta
= dis_AVX128_E_V_to_G(
27417 uses_vvvv
, vbi
, pfx
, delta
, "vpaddsw", Iop_QAdd16Sx8
);
27418 goto decode_success
;
27420 /* VPADDSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG ED /r */
27421 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27422 delta
= dis_AVX256_E_V_to_G(
27423 uses_vvvv
, vbi
, pfx
, delta
, "vpaddsw", Iop_QAdd16Sx16
);
27424 goto decode_success
;
27429 /* VPMAXSW r/m, rV, r ::: r = max-signed16s(rV, r/m) */
27430 /* VPMAXSW = VEX.NDS.128.66.0F.WIG EE /r */
27431 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27432 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27433 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxsw", Iop_Max16Sx8
);
27434 goto decode_success
;
27436 /* VPMAXSW r/m, rV, r ::: r = max-signed16s(rV, r/m) */
27437 /* VPMAXSW = VEX.NDS.256.66.0F.WIG EE /r */
27438 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27439 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27440 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxsw", Iop_Max16Sx16
);
27441 goto decode_success
;
27446 /* VPXOR r/m, rV, r ::: r = rV ^ r/m */
27447 /* VPXOR = VEX.NDS.128.66.0F.WIG EF /r */
27448 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27449 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27450 uses_vvvv
, vbi
, pfx
, delta
, "vpxor", Iop_XorV128
);
27451 goto decode_success
;
27453 /* VPXOR r/m, rV, r ::: r = rV ^ r/m */
27454 /* VPXOR = VEX.NDS.256.66.0F.WIG EF /r */
27455 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27456 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27457 uses_vvvv
, vbi
, pfx
, delta
, "vpxor", Iop_XorV256
);
27458 goto decode_success
;
27463 /* VLDDQU m256, ymm1 = VEX.256.F2.0F.WIG F0 /r */
27464 if (haveF2no66noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27465 UChar modrm
= getUChar(delta
);
27466 UInt rD
= gregOfRexRM(pfx
, modrm
);
27467 IRTemp tD
= newTemp(Ity_V256
);
27468 if (epartIsReg(modrm
)) break;
27469 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
27471 assign(tD
, loadLE(Ity_V256
, mkexpr(addr
)));
27472 DIP("vlddqu %s,%s\n", dis_buf
, nameYMMReg(rD
));
27473 putYMMReg(rD
, mkexpr(tD
));
27474 goto decode_success
;
27476 /* VLDDQU m128, xmm1 = VEX.128.F2.0F.WIG F0 /r */
27477 if (haveF2no66noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27478 UChar modrm
= getUChar(delta
);
27479 UInt rD
= gregOfRexRM(pfx
, modrm
);
27480 IRTemp tD
= newTemp(Ity_V128
);
27481 if (epartIsReg(modrm
)) break;
27482 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
27484 assign(tD
, loadLE(Ity_V128
, mkexpr(addr
)));
27485 DIP("vlddqu %s,%s\n", dis_buf
, nameXMMReg(rD
));
27486 putYMMRegLoAndZU(rD
, mkexpr(tD
));
27487 goto decode_success
;
27492 /* VPSLLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F1 /r */
27493 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27494 delta
= dis_AVX128_shiftV_byE( vbi
, pfx
, delta
,
27495 "vpsllw", Iop_ShlN16x8
);
27497 goto decode_success
;
27500 /* VPSLLW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F1 /r */
27501 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27502 delta
= dis_AVX256_shiftV_byE( vbi
, pfx
, delta
,
27503 "vpsllw", Iop_ShlN16x16
);
27505 goto decode_success
;
27511 /* VPSLLD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F2 /r */
27512 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27513 delta
= dis_AVX128_shiftV_byE( vbi
, pfx
, delta
,
27514 "vpslld", Iop_ShlN32x4
);
27516 goto decode_success
;
27518 /* VPSLLD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F2 /r */
27519 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27520 delta
= dis_AVX256_shiftV_byE( vbi
, pfx
, delta
,
27521 "vpslld", Iop_ShlN32x8
);
27523 goto decode_success
;
27528 /* VPSLLQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F3 /r */
27529 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27530 delta
= dis_AVX128_shiftV_byE( vbi
, pfx
, delta
,
27531 "vpsllq", Iop_ShlN64x2
);
27533 goto decode_success
;
27535 /* VPSLLQ xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F3 /r */
27536 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27537 delta
= dis_AVX256_shiftV_byE( vbi
, pfx
, delta
,
27538 "vpsllq", Iop_ShlN64x4
);
27540 goto decode_success
;
27545 /* VPMULUDQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F4 /r */
27546 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27547 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
27548 uses_vvvv
, vbi
, pfx
, delta
,
27549 "vpmuludq", math_PMULUDQ_128
);
27550 goto decode_success
;
27552 /* VPMULUDQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F4 /r */
27553 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27554 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
27555 uses_vvvv
, vbi
, pfx
, delta
,
27556 "vpmuludq", math_PMULUDQ_256
);
27557 goto decode_success
;
27562 /* VPMADDWD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F5 /r */
27563 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27564 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
27565 uses_vvvv
, vbi
, pfx
, delta
,
27566 "vpmaddwd", math_PMADDWD_128
);
27567 goto decode_success
;
27569 /* VPMADDWD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F5 /r */
27570 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27571 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
27572 uses_vvvv
, vbi
, pfx
, delta
,
27573 "vpmaddwd", math_PMADDWD_256
);
27574 goto decode_success
;
27579 /* VPSADBW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F6 /r */
27580 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27581 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
27582 uses_vvvv
, vbi
, pfx
, delta
,
27583 "vpsadbw", math_PSADBW_128
);
27584 goto decode_success
;
27586 /* VPSADBW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F6 /r */
27587 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27588 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
27589 uses_vvvv
, vbi
, pfx
, delta
,
27590 "vpsadbw", math_PSADBW_256
);
27591 goto decode_success
;
27596 /* VMASKMOVDQU xmm2, xmm1 = VEX.128.66.0F.WIG F7 /r */
27597 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
27598 && epartIsReg(getUChar(delta
))) {
27599 delta
= dis_MASKMOVDQU( vbi
, pfx
, delta
, True
/*isAvx*/ );
27600 goto decode_success
;
27605 /* VPSUBB r/m, rV, r ::: r = rV - r/m */
27606 /* VPSUBB = VEX.NDS.128.66.0F.WIG F8 /r */
27607 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27608 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27609 uses_vvvv
, vbi
, pfx
, delta
, "vpsubb", Iop_Sub8x16
);
27610 goto decode_success
;
27612 /* VPSUBB r/m, rV, r ::: r = rV - r/m */
27613 /* VPSUBB = VEX.NDS.256.66.0F.WIG F8 /r */
27614 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27615 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27616 uses_vvvv
, vbi
, pfx
, delta
, "vpsubb", Iop_Sub8x32
);
27617 goto decode_success
;
27622 /* VPSUBW r/m, rV, r ::: r = rV - r/m */
27623 /* VPSUBW = VEX.NDS.128.66.0F.WIG F9 /r */
27624 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27625 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27626 uses_vvvv
, vbi
, pfx
, delta
, "vpsubw", Iop_Sub16x8
);
27627 goto decode_success
;
27629 /* VPSUBW r/m, rV, r ::: r = rV - r/m */
27630 /* VPSUBW = VEX.NDS.256.66.0F.WIG F9 /r */
27631 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27632 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27633 uses_vvvv
, vbi
, pfx
, delta
, "vpsubw", Iop_Sub16x16
);
27634 goto decode_success
;
27639 /* VPSUBD r/m, rV, r ::: r = rV - r/m */
27640 /* VPSUBD = VEX.NDS.128.66.0F.WIG FA /r */
27641 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27642 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27643 uses_vvvv
, vbi
, pfx
, delta
, "vpsubd", Iop_Sub32x4
);
27644 goto decode_success
;
27646 /* VPSUBD r/m, rV, r ::: r = rV - r/m */
27647 /* VPSUBD = VEX.NDS.256.66.0F.WIG FA /r */
27648 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27649 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27650 uses_vvvv
, vbi
, pfx
, delta
, "vpsubd", Iop_Sub32x8
);
27651 goto decode_success
;
27656 /* VPSUBQ r/m, rV, r ::: r = rV - r/m */
27657 /* VPSUBQ = VEX.NDS.128.66.0F.WIG FB /r */
27658 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27659 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27660 uses_vvvv
, vbi
, pfx
, delta
, "vpsubq", Iop_Sub64x2
);
27661 goto decode_success
;
27663 /* VPSUBQ r/m, rV, r ::: r = rV - r/m */
27664 /* VPSUBQ = VEX.NDS.256.66.0F.WIG FB /r */
27665 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27666 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27667 uses_vvvv
, vbi
, pfx
, delta
, "vpsubq", Iop_Sub64x4
);
27668 goto decode_success
;
27673 /* VPADDB r/m, rV, r ::: r = rV + r/m */
27674 /* VPADDB = VEX.NDS.128.66.0F.WIG FC /r */
27675 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27676 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27677 uses_vvvv
, vbi
, pfx
, delta
, "vpaddb", Iop_Add8x16
);
27678 goto decode_success
;
27680 /* VPADDB r/m, rV, r ::: r = rV + r/m */
27681 /* VPADDB = VEX.NDS.256.66.0F.WIG FC /r */
27682 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27683 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27684 uses_vvvv
, vbi
, pfx
, delta
, "vpaddb", Iop_Add8x32
);
27685 goto decode_success
;
27690 /* VPADDW r/m, rV, r ::: r = rV + r/m */
27691 /* VPADDW = VEX.NDS.128.66.0F.WIG FD /r */
27692 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27693 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27694 uses_vvvv
, vbi
, pfx
, delta
, "vpaddw", Iop_Add16x8
);
27695 goto decode_success
;
27697 /* VPADDW r/m, rV, r ::: r = rV + r/m */
27698 /* VPADDW = VEX.NDS.256.66.0F.WIG FD /r */
27699 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27700 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27701 uses_vvvv
, vbi
, pfx
, delta
, "vpaddw", Iop_Add16x16
);
27702 goto decode_success
;
27707 /* VPADDD r/m, rV, r ::: r = rV + r/m */
27708 /* VPADDD = VEX.NDS.128.66.0F.WIG FE /r */
27709 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
27710 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27711 uses_vvvv
, vbi
, pfx
, delta
, "vpaddd", Iop_Add32x4
);
27712 goto decode_success
;
27714 /* VPADDD r/m, rV, r ::: r = rV + r/m */
27715 /* VPADDD = VEX.NDS.256.66.0F.WIG FE /r */
27716 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
27717 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27718 uses_vvvv
, vbi
, pfx
, delta
, "vpaddd", Iop_Add32x8
);
27719 goto decode_success
;
27736 /*------------------------------------------------------------*/
27738 /*--- Top-level post-escape decoders: dis_ESC_0F38__VEX ---*/
27740 /*------------------------------------------------------------*/
27742 static IRTemp
math_PERMILPS_VAR_128 ( IRTemp dataV
, IRTemp ctrlV
)
27744 /* In the control vector, zero out all but the bottom two bits of
27745 each 32-bit lane. */
27746 IRExpr
* cv1
= binop(Iop_ShrN32x4
,
27747 binop(Iop_ShlN32x4
, mkexpr(ctrlV
), mkU8(30)),
27749 /* And use the resulting cleaned-up control vector as steering
27750 in a Perm operation. */
27751 IRTemp res
= newTemp(Ity_V128
);
27752 assign(res
, binop(Iop_Perm32x4
, mkexpr(dataV
), cv1
));
27756 static IRTemp
math_PERMILPS_VAR_256 ( IRTemp dataV
, IRTemp ctrlV
)
27758 IRTemp dHi
, dLo
, cHi
, cLo
;
27759 dHi
= dLo
= cHi
= cLo
= IRTemp_INVALID
;
27760 breakupV256toV128s( dataV
, &dHi
, &dLo
);
27761 breakupV256toV128s( ctrlV
, &cHi
, &cLo
);
27762 IRTemp rHi
= math_PERMILPS_VAR_128( dHi
, cHi
);
27763 IRTemp rLo
= math_PERMILPS_VAR_128( dLo
, cLo
);
27764 IRTemp res
= newTemp(Ity_V256
);
27765 assign(res
, binop(Iop_V128HLtoV256
, mkexpr(rHi
), mkexpr(rLo
)));
27769 static IRTemp
math_PERMILPD_VAR_128 ( IRTemp dataV
, IRTemp ctrlV
)
27771 /* No cleverness here .. */
27772 IRTemp dHi
, dLo
, cHi
, cLo
;
27773 dHi
= dLo
= cHi
= cLo
= IRTemp_INVALID
;
27774 breakupV128to64s( dataV
, &dHi
, &dLo
);
27775 breakupV128to64s( ctrlV
, &cHi
, &cLo
);
27777 = IRExpr_ITE( unop(Iop_64to1
,
27778 binop(Iop_Shr64
, mkexpr(cHi
), mkU8(1))),
27779 mkexpr(dHi
), mkexpr(dLo
) );
27781 = IRExpr_ITE( unop(Iop_64to1
,
27782 binop(Iop_Shr64
, mkexpr(cLo
), mkU8(1))),
27783 mkexpr(dHi
), mkexpr(dLo
) );
27784 IRTemp res
= newTemp(Ity_V128
);
27785 assign(res
, binop(Iop_64HLtoV128
, rHi
, rLo
));
27789 static IRTemp
math_PERMILPD_VAR_256 ( IRTemp dataV
, IRTemp ctrlV
)
27791 IRTemp dHi
, dLo
, cHi
, cLo
;
27792 dHi
= dLo
= cHi
= cLo
= IRTemp_INVALID
;
27793 breakupV256toV128s( dataV
, &dHi
, &dLo
);
27794 breakupV256toV128s( ctrlV
, &cHi
, &cLo
);
27795 IRTemp rHi
= math_PERMILPD_VAR_128( dHi
, cHi
);
27796 IRTemp rLo
= math_PERMILPD_VAR_128( dLo
, cLo
);
27797 IRTemp res
= newTemp(Ity_V256
);
27798 assign(res
, binop(Iop_V128HLtoV256
, mkexpr(rHi
), mkexpr(rLo
)));
27802 static IRTemp
math_VPERMD ( IRTemp ctrlV
, IRTemp dataV
)
27804 /* In the control vector, zero out all but the bottom three bits of
27805 each 32-bit lane. */
27806 IRExpr
* cv1
= binop(Iop_ShrN32x8
,
27807 binop(Iop_ShlN32x8
, mkexpr(ctrlV
), mkU8(29)),
27809 /* And use the resulting cleaned-up control vector as steering
27810 in a Perm operation. */
27811 IRTemp res
= newTemp(Ity_V256
);
27812 assign(res
, binop(Iop_Perm32x8
, mkexpr(dataV
), cv1
));
27816 static Long
dis_SHIFTX ( /*OUT*/Bool
* uses_vvvv
,
27817 const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
,
27818 const HChar
* opname
, IROp op8
)
27822 Int size
= getRexW(pfx
) ? 8 : 4;
27823 IRType ty
= szToITy(size
);
27824 IRTemp src
= newTemp(ty
);
27825 IRTemp amt
= newTemp(ty
);
27826 UChar rm
= getUChar(delta
);
27828 assign( amt
, getIRegV(size
,pfx
) );
27829 if (epartIsReg(rm
)) {
27830 assign( src
, getIRegE(size
,pfx
,rm
) );
27831 DIP("%s %s,%s,%s\n", opname
, nameIRegV(size
,pfx
),
27832 nameIRegE(size
,pfx
,rm
), nameIRegG(size
,pfx
,rm
));
27835 IRTemp addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
27836 assign( src
, loadLE(ty
, mkexpr(addr
)) );
27837 DIP("%s %s,%s,%s\n", opname
, nameIRegV(size
,pfx
), dis_buf
,
27838 nameIRegG(size
,pfx
,rm
));
27842 putIRegG( size
, pfx
, rm
,
27843 binop(mkSizedOp(ty
,op8
), mkexpr(src
),
27844 narrowTo(Ity_I8
, binop(mkSizedOp(ty
,Iop_And8
), mkexpr(amt
),
27845 mkU(ty
,8*size
-1)))) );
27846 /* Flags aren't modified. */
27852 static Long
dis_FMA ( const VexAbiInfo
* vbi
, Prefix pfx
, Long delta
, UChar opc
)
27854 UChar modrm
= getUChar(delta
);
27855 UInt rG
= gregOfRexRM(pfx
, modrm
);
27856 UInt rV
= getVexNvvvv(pfx
);
27857 Bool scalar
= (opc
& 0xF) > 7 && (opc
& 1);
27858 IRType ty
= getRexW(pfx
) ? Ity_F64
: Ity_F32
;
27859 IRType vty
= scalar
? ty
: (getVexL(pfx
) ? Ity_V256
: Ity_V128
);
27860 IRTemp addr
= IRTemp_INVALID
;
27864 const HChar
*suffix
;
27865 const HChar
*order
;
27866 Bool negateRes
= False
;
27867 Bool negateZeven
= False
;
27868 Bool negateZodd
= False
;
27871 switch (opc
& 0xF) {
27872 case 0x6: name
= "addsub"; negateZeven
= True
; break;
27873 case 0x7: name
= "subadd"; negateZodd
= True
; break;
27875 case 0x9: name
= "add"; break;
27877 case 0xB: name
= "sub"; negateZeven
= True
; negateZodd
= True
;
27880 case 0xD: name
= "add"; negateRes
= True
; negateZeven
= True
;
27881 negateZodd
= True
; break;
27883 case 0xF: name
= "sub"; negateRes
= True
; break;
27884 default: vpanic("dis_FMA(amd64)"); break;
27886 switch (opc
& 0xF0) {
27887 case 0x90: order
= "132"; break;
27888 case 0xA0: order
= "213"; break;
27889 case 0xB0: order
= "231"; break;
27890 default: vpanic("dis_FMA(amd64)"); break;
27893 suffix
= ty
== Ity_F64
? "sd" : "ss";
27895 suffix
= ty
== Ity_F64
? "pd" : "ps";
27898 // Figure out |count| (the number of elements) by considering |vty| and |ty|.
27899 count
= sizeofIRType(vty
) / sizeofIRType(ty
);
27900 vassert(count
== 1 || count
== 2 || count
== 4 || count
== 8);
27902 // Fetch operands into the first |count| elements of |sX|, |sY| and |sZ|.
27904 IRExpr
*sX
[8], *sY
[8], *sZ
[8], *res
[8];
27905 for (i
= 0; i
< 8; i
++) sX
[i
] = sY
[i
] = sZ
[i
] = res
[i
] = NULL
;
27907 IRExpr
* (*getYMMRegLane
)(UInt
,Int
)
27908 = ty
== Ity_F32
? getYMMRegLane32F
: getYMMRegLane64F
;
27909 void (*putYMMRegLane
)(UInt
,Int
,IRExpr
*)
27910 = ty
== Ity_F32
? putYMMRegLane32F
: putYMMRegLane64F
;
27912 for (i
= 0; i
< count
; i
++) {
27913 sX
[i
] = getYMMRegLane(rG
, i
);
27914 sZ
[i
] = getYMMRegLane(rV
, i
);
27917 if (epartIsReg(modrm
)) {
27918 UInt rE
= eregOfRexRM(pfx
, modrm
);
27920 for (i
= 0; i
< count
; i
++) {
27921 sY
[i
] = getYMMRegLane(rE
, i
);
27923 if (vty
== Ity_V256
) {
27924 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes
? "n" : "",
27925 name
, order
, suffix
, nameYMMReg(rE
), nameYMMReg(rV
),
27928 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes
? "n" : "",
27929 name
, order
, suffix
, nameXMMReg(rE
), nameXMMReg(rV
),
27933 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
27935 for (i
= 0; i
< count
; i
++) {
27936 sY
[i
] = loadLE(ty
, binop(Iop_Add64
, mkexpr(addr
),
27937 mkU64(i
* sizeofIRType(ty
))));
27939 if (vty
== Ity_V256
) {
27940 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes
? "n" : "",
27941 name
, order
, suffix
, dis_buf
, nameYMMReg(rV
),
27944 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes
? "n" : "",
27945 name
, order
, suffix
, dis_buf
, nameXMMReg(rV
),
27950 /* vX/vY/vZ are now in 132 order. If the instruction requires a different
27951 order, swap them around. */
27953 # define COPY_ARR(_dst, _src) \
27954 do { for (int j = 0; j < 8; j++) { _dst[j] = _src[j]; } } while (0)
27956 if ((opc
& 0xF0) != 0x90) {
27958 COPY_ARR(temp
, sX
);
27959 if ((opc
& 0xF0) == 0xA0) {
27962 COPY_ARR(sY
, temp
);
27965 COPY_ARR(sZ
, temp
);
27971 for (i
= 0; i
< count
; i
++) {
27972 IROp opNEG
= ty
== Ity_F64
? Iop_NegF64
: Iop_NegF32
;
27973 if ((i
& 1) ? negateZodd
: negateZeven
) {
27974 sZ
[i
] = unop(opNEG
, sZ
[i
]);
27976 res
[i
] = IRExpr_Qop(ty
== Ity_F64
? Iop_MAddF64
: Iop_MAddF32
,
27977 get_FAKE_roundingmode(), sX
[i
], sY
[i
], sZ
[i
]);
27979 res
[i
] = unop(opNEG
, res
[i
]);
27983 for (i
= 0; i
< count
; i
++) {
27984 putYMMRegLane(rG
, i
, res
[i
]);
27988 case Ity_F32
: putYMMRegLane32(rG
, 1, mkU32(0)); /*fallthru*/
27989 case Ity_F64
: putYMMRegLane64(rG
, 1, mkU64(0)); /*fallthru*/
27990 case Ity_V128
: putYMMRegLane128(rG
, 1, mkV128(0)); /*fallthru*/
27991 case Ity_V256
: break;
27992 default: vassert(0);
27999 /* Masked load or masked store. */
28000 static ULong
dis_VMASKMOV ( Bool
*uses_vvvv
, const VexAbiInfo
* vbi
,
28001 Prefix pfx
, Long delta
,
28002 const HChar
* opname
, Bool isYMM
, IRType ty
,
28008 UChar modrm
= getUChar(delta
);
28009 UInt rG
= gregOfRexRM(pfx
,modrm
);
28010 UInt rV
= getVexNvvvv(pfx
);
28012 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28015 /**/ if (isLoad
&& isYMM
) {
28016 DIP("%s %s,%s,%s\n", opname
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
) );
28018 else if (isLoad
&& !isYMM
) {
28019 DIP("%s %s,%s,%s\n", opname
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
) );
28022 else if (!isLoad
&& isYMM
) {
28023 DIP("%s %s,%s,%s\n", opname
, nameYMMReg(rG
), nameYMMReg(rV
), dis_buf
);
28026 vassert(!isLoad
&& !isYMM
);
28027 DIP("%s %s,%s,%s\n", opname
, nameXMMReg(rG
), nameXMMReg(rV
), dis_buf
);
28030 vassert(ty
== Ity_I32
|| ty
== Ity_I64
);
28031 Bool laneIs32
= ty
== Ity_I32
;
28033 Int nLanes
= (isYMM
? 2 : 1) * (laneIs32
? 4 : 2);
28035 for (i
= 0; i
< nLanes
; i
++) {
28036 IRExpr
* shAmt
= laneIs32
? mkU8(31) : mkU8(63);
28037 IRExpr
* one
= laneIs32
? mkU32(1) : mkU64(1);
28038 IROp opSHR
= laneIs32
? Iop_Shr32
: Iop_Shr64
;
28039 IROp opEQ
= laneIs32
? Iop_CmpEQ32
: Iop_CmpEQ64
;
28040 IRExpr
* lane
= (laneIs32
? getYMMRegLane32
: getYMMRegLane64
)( rV
, i
);
28042 IRTemp cond
= newTemp(Ity_I1
);
28043 assign(cond
, binop(opEQ
, binop(opSHR
, lane
, shAmt
), one
));
28045 IRTemp data
= newTemp(ty
);
28046 IRExpr
* ea
= binop(Iop_Add64
, mkexpr(addr
),
28047 mkU64(i
* (laneIs32
? 4 : 8)));
28051 Iend_LE
, laneIs32
? ILGop_Ident32
: ILGop_Ident64
,
28052 data
, ea
, laneIs32
? mkU32(0) : mkU64(0), mkexpr(cond
)
28054 (laneIs32
? putYMMRegLane32
: putYMMRegLane64
)( rG
, i
, mkexpr(data
) );
28056 assign(data
, (laneIs32
? getYMMRegLane32
: getYMMRegLane64
)( rG
, i
));
28057 stmt( IRStmt_StoreG(Iend_LE
, ea
, mkexpr(data
), mkexpr(cond
)) );
28061 if (isLoad
&& !isYMM
)
28062 putYMMRegLane128( rG
, 1, mkV128(0) );
28070 static ULong
dis_VGATHER ( Bool
*uses_vvvv
, const VexAbiInfo
* vbi
,
28071 Prefix pfx
, Long delta
,
28072 const HChar
* opname
, Bool isYMM
,
28073 Bool isVM64x
, IRType ty
)
28076 Int alen
, i
, vscale
, count1
, count2
;
28078 UChar modrm
= getUChar(delta
);
28079 UInt rG
= gregOfRexRM(pfx
,modrm
);
28080 UInt rV
= getVexNvvvv(pfx
);
28082 IRType dstTy
= (isYMM
&& (ty
== Ity_I64
|| !isVM64x
)) ? Ity_V256
: Ity_V128
;
28083 IRType idxTy
= (isYMM
&& (ty
== Ity_I32
|| isVM64x
)) ? Ity_V256
: Ity_V128
;
28085 addr
= disAVSIBMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, &rI
,
28087 if (addr
== IRTemp_INVALID
|| rI
== rG
|| rI
== rV
|| rG
== rV
)
28089 if (dstTy
== Ity_V256
) {
28090 DIP("%s %s,%s,%s\n", opname
, nameYMMReg(rV
), dis_buf
, nameYMMReg(rG
) );
28092 DIP("%s %s,%s,%s\n", opname
, nameXMMReg(rV
), dis_buf
, nameXMMReg(rG
) );
28096 if (ty
== Ity_I32
) {
28097 count1
= isYMM
? 8 : 4;
28098 count2
= isVM64x
? count1
/ 2 : count1
;
28100 count1
= count2
= isYMM
? 4 : 2;
28103 /* First update the mask register to copies of the sign bit. */
28104 if (ty
== Ity_I32
) {
28106 putYMMReg( rV
, binop(Iop_SarN32x8
, getYMMReg( rV
), mkU8(31)) );
28108 putYMMRegLoAndZU( rV
, binop(Iop_SarN32x4
, getXMMReg( rV
), mkU8(31)) );
28110 for (i
= 0; i
< count1
; i
++) {
28111 putYMMRegLane64( rV
, i
, binop(Iop_Sar64
, getYMMRegLane64( rV
, i
),
28116 /* Next gather the individual elements. If any fault occurs, the
28117 corresponding mask element will be set and the loop stops. */
28118 for (i
= 0; i
< count2
; i
++) {
28119 IRExpr
*expr
, *addr_expr
;
28120 cond
= newTemp(Ity_I1
);
28122 binop(ty
== Ity_I32
? Iop_CmpLT32S
: Iop_CmpLT64S
,
28123 ty
== Ity_I32
? getYMMRegLane32( rV
, i
)
28124 : getYMMRegLane64( rV
, i
),
28126 expr
= ty
== Ity_I32
? getYMMRegLane32( rG
, i
)
28127 : getYMMRegLane64( rG
, i
);
28128 addr_expr
= isVM64x
? getYMMRegLane64( rI
, i
)
28129 : unop(Iop_32Sto64
, getYMMRegLane32( rI
, i
));
28131 case 2: addr_expr
= binop(Iop_Shl64
, addr_expr
, mkU8(1)); break;
28132 case 4: addr_expr
= binop(Iop_Shl64
, addr_expr
, mkU8(2)); break;
28133 case 8: addr_expr
= binop(Iop_Shl64
, addr_expr
, mkU8(3)); break;
28136 addr_expr
= binop(Iop_Add64
, mkexpr(addr
), addr_expr
);
28137 addr_expr
= handleAddrOverrides(vbi
, pfx
, addr_expr
);
28138 addr_expr
= IRExpr_ITE(mkexpr(cond
), addr_expr
, getIReg64(R_RSP
));
28139 expr
= IRExpr_ITE(mkexpr(cond
), loadLE(ty
, addr_expr
), expr
);
28140 if (ty
== Ity_I32
) {
28141 putYMMRegLane32( rG
, i
, expr
);
28142 putYMMRegLane32( rV
, i
, mkU32(0) );
28144 putYMMRegLane64( rG
, i
, expr
);
28145 putYMMRegLane64( rV
, i
, mkU64(0) );
28149 if (!isYMM
|| (ty
== Ity_I32
&& isVM64x
)) {
28150 if (ty
== Ity_I64
|| isYMM
)
28151 putYMMRegLane128( rV
, 1, mkV128(0) );
28152 else if (ty
== Ity_I32
&& count2
== 2) {
28153 putYMMRegLane64( rV
, 1, mkU64(0) );
28154 putYMMRegLane64( rG
, 1, mkU64(0) );
28156 putYMMRegLane128( rG
, 1, mkV128(0) );
28164 __attribute__((noinline
))
28166 Long
dis_ESC_0F38__VEX (
28167 /*MB_OUT*/DisResult
* dres
,
28168 /*OUT*/ Bool
* uses_vvvv
,
28169 const VexArchInfo
* archinfo
,
28170 const VexAbiInfo
* vbi
,
28171 Prefix pfx
, Int sz
, Long deltaIN
28174 IRTemp addr
= IRTemp_INVALID
;
28177 Long delta
= deltaIN
;
28178 UChar opc
= getUChar(delta
);
28180 *uses_vvvv
= False
;
28185 /* VPSHUFB r/m, rV, r ::: r = shuf(rV, r/m) */
28186 /* VPSHUFB = VEX.NDS.128.66.0F38.WIG 00 /r */
28187 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28188 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
28189 uses_vvvv
, vbi
, pfx
, delta
, "vpshufb", math_PSHUFB_XMM
);
28190 goto decode_success
;
28192 /* VPSHUFB r/m, rV, r ::: r = shuf(rV, r/m) */
28193 /* VPSHUFB = VEX.NDS.256.66.0F38.WIG 00 /r */
28194 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28195 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28196 uses_vvvv
, vbi
, pfx
, delta
, "vpshufb", math_PSHUFB_YMM
);
28197 goto decode_success
;
28204 /* VPHADDW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 01 /r */
28205 /* VPHADDD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 02 /r */
28206 /* VPHADDSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 03 /r */
28207 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28208 delta
= dis_PHADD_128( vbi
, pfx
, delta
, True
/*isAvx*/, opc
);
28210 goto decode_success
;
28212 /* VPHADDW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 01 /r */
28213 /* VPHADDD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 02 /r */
28214 /* VPHADDSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 03 /r */
28215 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28216 delta
= dis_PHADD_256( vbi
, pfx
, delta
, opc
);
28218 goto decode_success
;
28223 /* VPMADDUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 04 /r */
28224 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28225 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
28226 uses_vvvv
, vbi
, pfx
, delta
, "vpmaddubsw",
28227 math_PMADDUBSW_128
);
28228 goto decode_success
;
28230 /* VPMADDUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 04 /r */
28231 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28232 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28233 uses_vvvv
, vbi
, pfx
, delta
, "vpmaddubsw",
28234 math_PMADDUBSW_256
);
28235 goto decode_success
;
28242 /* VPHSUBW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 05 /r */
28243 /* VPHSUBD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 06 /r */
28244 /* VPHSUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 07 /r */
28245 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28246 delta
= dis_PHADD_128( vbi
, pfx
, delta
, True
/*isAvx*/, opc
);
28248 goto decode_success
;
28250 /* VPHSUBW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 05 /r */
28251 /* VPHSUBD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 06 /r */
28252 /* VPHSUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 07 /r */
28253 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28254 delta
= dis_PHADD_256( vbi
, pfx
, delta
, opc
);
28256 goto decode_success
;
28263 /* VPSIGNB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 08 /r */
28264 /* VPSIGNW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 09 /r */
28265 /* VPSIGND xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 0A /r */
28266 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28267 IRTemp sV
= newTemp(Ity_V128
);
28268 IRTemp dV
= newTemp(Ity_V128
);
28269 IRTemp sHi
, sLo
, dHi
, dLo
;
28270 sHi
= sLo
= dHi
= dLo
= IRTemp_INVALID
;
28273 UChar modrm
= getUChar(delta
);
28274 UInt rG
= gregOfRexRM(pfx
,modrm
);
28275 UInt rV
= getVexNvvvv(pfx
);
28278 case 0x08: laneszB
= 1; ch
= 'b'; break;
28279 case 0x09: laneszB
= 2; ch
= 'w'; break;
28280 case 0x0A: laneszB
= 4; ch
= 'd'; break;
28281 default: vassert(0);
28284 assign( dV
, getXMMReg(rV
) );
28286 if (epartIsReg(modrm
)) {
28287 UInt rE
= eregOfRexRM(pfx
,modrm
);
28288 assign( sV
, getXMMReg(rE
) );
28290 DIP("vpsign%c %s,%s,%s\n", ch
, nameXMMReg(rE
),
28291 nameXMMReg(rV
), nameXMMReg(rG
));
28293 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28294 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
28296 DIP("vpsign%c %s,%s,%s\n", ch
, dis_buf
,
28297 nameXMMReg(rV
), nameXMMReg(rG
));
28300 breakupV128to64s( dV
, &dHi
, &dLo
);
28301 breakupV128to64s( sV
, &sHi
, &sLo
);
28305 binop(Iop_64HLtoV128
,
28306 dis_PSIGN_helper( mkexpr(sHi
), mkexpr(dHi
), laneszB
),
28307 dis_PSIGN_helper( mkexpr(sLo
), mkexpr(dLo
), laneszB
)
28311 goto decode_success
;
28313 /* VPSIGNB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 08 /r */
28314 /* VPSIGNW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 09 /r */
28315 /* VPSIGND ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 0A /r */
28316 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28317 IRTemp sV
= newTemp(Ity_V256
);
28318 IRTemp dV
= newTemp(Ity_V256
);
28319 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
28320 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
28321 d3
= d2
= d1
= d0
= IRTemp_INVALID
;
28324 UChar modrm
= getUChar(delta
);
28325 UInt rG
= gregOfRexRM(pfx
,modrm
);
28326 UInt rV
= getVexNvvvv(pfx
);
28329 case 0x08: laneszB
= 1; ch
= 'b'; break;
28330 case 0x09: laneszB
= 2; ch
= 'w'; break;
28331 case 0x0A: laneszB
= 4; ch
= 'd'; break;
28332 default: vassert(0);
28335 assign( dV
, getYMMReg(rV
) );
28337 if (epartIsReg(modrm
)) {
28338 UInt rE
= eregOfRexRM(pfx
,modrm
);
28339 assign( sV
, getYMMReg(rE
) );
28341 DIP("vpsign%c %s,%s,%s\n", ch
, nameYMMReg(rE
),
28342 nameYMMReg(rV
), nameYMMReg(rG
));
28344 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28345 assign( sV
, loadLE(Ity_V256
, mkexpr(addr
)) );
28347 DIP("vpsign%c %s,%s,%s\n", ch
, dis_buf
,
28348 nameYMMReg(rV
), nameYMMReg(rG
));
28351 breakupV256to64s( dV
, &d3
, &d2
, &d1
, &d0
);
28352 breakupV256to64s( sV
, &s3
, &s2
, &s1
, &s0
);
28356 binop( Iop_V128HLtoV256
,
28357 binop(Iop_64HLtoV128
,
28358 dis_PSIGN_helper( mkexpr(s3
), mkexpr(d3
), laneszB
),
28359 dis_PSIGN_helper( mkexpr(s2
), mkexpr(d2
), laneszB
)
28361 binop(Iop_64HLtoV128
,
28362 dis_PSIGN_helper( mkexpr(s1
), mkexpr(d1
), laneszB
),
28363 dis_PSIGN_helper( mkexpr(s0
), mkexpr(d0
), laneszB
)
28368 goto decode_success
;
28373 /* VPMULHRSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 0B /r */
28374 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28375 IRTemp sV
= newTemp(Ity_V128
);
28376 IRTemp dV
= newTemp(Ity_V128
);
28377 IRTemp sHi
, sLo
, dHi
, dLo
;
28378 sHi
= sLo
= dHi
= dLo
= IRTemp_INVALID
;
28379 UChar modrm
= getUChar(delta
);
28380 UInt rG
= gregOfRexRM(pfx
,modrm
);
28381 UInt rV
= getVexNvvvv(pfx
);
28383 assign( dV
, getXMMReg(rV
) );
28385 if (epartIsReg(modrm
)) {
28386 UInt rE
= eregOfRexRM(pfx
,modrm
);
28387 assign( sV
, getXMMReg(rE
) );
28389 DIP("vpmulhrsw %s,%s,%s\n", nameXMMReg(rE
),
28390 nameXMMReg(rV
), nameXMMReg(rG
));
28392 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28393 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
28395 DIP("vpmulhrsw %s,%s,%s\n", dis_buf
,
28396 nameXMMReg(rV
), nameXMMReg(rG
));
28399 breakupV128to64s( dV
, &dHi
, &dLo
);
28400 breakupV128to64s( sV
, &sHi
, &sLo
);
28404 binop(Iop_64HLtoV128
,
28405 dis_PMULHRSW_helper( mkexpr(sHi
), mkexpr(dHi
) ),
28406 dis_PMULHRSW_helper( mkexpr(sLo
), mkexpr(dLo
) )
28410 goto decode_success
;
28412 /* VPMULHRSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 0B /r */
28413 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28414 IRTemp sV
= newTemp(Ity_V256
);
28415 IRTemp dV
= newTemp(Ity_V256
);
28416 IRTemp s3
, s2
, s1
, s0
, d3
, d2
, d1
, d0
;
28417 s3
= s2
= s1
= s0
= d3
= d2
= d1
= d0
= IRTemp_INVALID
;
28418 UChar modrm
= getUChar(delta
);
28419 UInt rG
= gregOfRexRM(pfx
,modrm
);
28420 UInt rV
= getVexNvvvv(pfx
);
28422 assign( dV
, getYMMReg(rV
) );
28424 if (epartIsReg(modrm
)) {
28425 UInt rE
= eregOfRexRM(pfx
,modrm
);
28426 assign( sV
, getYMMReg(rE
) );
28428 DIP("vpmulhrsw %s,%s,%s\n", nameYMMReg(rE
),
28429 nameYMMReg(rV
), nameYMMReg(rG
));
28431 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28432 assign( sV
, loadLE(Ity_V256
, mkexpr(addr
)) );
28434 DIP("vpmulhrsw %s,%s,%s\n", dis_buf
,
28435 nameYMMReg(rV
), nameYMMReg(rG
));
28438 breakupV256to64s( dV
, &d3
, &d2
, &d1
, &d0
);
28439 breakupV256to64s( sV
, &s3
, &s2
, &s1
, &s0
);
28443 binop(Iop_V128HLtoV256
,
28444 binop(Iop_64HLtoV128
,
28445 dis_PMULHRSW_helper( mkexpr(s3
), mkexpr(d3
) ),
28446 dis_PMULHRSW_helper( mkexpr(s2
), mkexpr(d2
) ) ),
28447 binop(Iop_64HLtoV128
,
28448 dis_PMULHRSW_helper( mkexpr(s1
), mkexpr(d1
) ),
28449 dis_PMULHRSW_helper( mkexpr(s0
), mkexpr(d0
) ) )
28453 dres
->hint
= Dis_HintVerbose
;
28454 goto decode_success
;
28459 /* VPERMILPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 0C /r */
28460 if (have66noF2noF3(pfx
)
28461 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
28462 UChar modrm
= getUChar(delta
);
28463 UInt rG
= gregOfRexRM(pfx
, modrm
);
28464 UInt rV
= getVexNvvvv(pfx
);
28465 IRTemp ctrlV
= newTemp(Ity_V128
);
28466 if (epartIsReg(modrm
)) {
28467 UInt rE
= eregOfRexRM(pfx
, modrm
);
28469 DIP("vpermilps %s,%s,%s\n",
28470 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
28471 assign(ctrlV
, getXMMReg(rE
));
28473 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28475 DIP("vpermilps %s,%s,%s\n",
28476 dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
28477 assign(ctrlV
, loadLE(Ity_V128
, mkexpr(addr
)));
28479 IRTemp dataV
= newTemp(Ity_V128
);
28480 assign(dataV
, getXMMReg(rV
));
28481 IRTemp resV
= math_PERMILPS_VAR_128(dataV
, ctrlV
);
28482 putYMMRegLoAndZU(rG
, mkexpr(resV
));
28484 goto decode_success
;
28486 /* VPERMILPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 0C /r */
28487 if (have66noF2noF3(pfx
)
28488 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
28489 UChar modrm
= getUChar(delta
);
28490 UInt rG
= gregOfRexRM(pfx
, modrm
);
28491 UInt rV
= getVexNvvvv(pfx
);
28492 IRTemp ctrlV
= newTemp(Ity_V256
);
28493 if (epartIsReg(modrm
)) {
28494 UInt rE
= eregOfRexRM(pfx
, modrm
);
28496 DIP("vpermilps %s,%s,%s\n",
28497 nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
28498 assign(ctrlV
, getYMMReg(rE
));
28500 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28502 DIP("vpermilps %s,%s,%s\n",
28503 dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
28504 assign(ctrlV
, loadLE(Ity_V256
, mkexpr(addr
)));
28506 IRTemp dataV
= newTemp(Ity_V256
);
28507 assign(dataV
, getYMMReg(rV
));
28508 IRTemp resV
= math_PERMILPS_VAR_256(dataV
, ctrlV
);
28509 putYMMReg(rG
, mkexpr(resV
));
28511 goto decode_success
;
28516 /* VPERMILPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 0D /r */
28517 if (have66noF2noF3(pfx
)
28518 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
28519 UChar modrm
= getUChar(delta
);
28520 UInt rG
= gregOfRexRM(pfx
, modrm
);
28521 UInt rV
= getVexNvvvv(pfx
);
28522 IRTemp ctrlV
= newTemp(Ity_V128
);
28523 if (epartIsReg(modrm
)) {
28524 UInt rE
= eregOfRexRM(pfx
, modrm
);
28526 DIP("vpermilpd %s,%s,%s\n",
28527 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
28528 assign(ctrlV
, getXMMReg(rE
));
28530 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28532 DIP("vpermilpd %s,%s,%s\n",
28533 dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
28534 assign(ctrlV
, loadLE(Ity_V128
, mkexpr(addr
)));
28536 IRTemp dataV
= newTemp(Ity_V128
);
28537 assign(dataV
, getXMMReg(rV
));
28538 IRTemp resV
= math_PERMILPD_VAR_128(dataV
, ctrlV
);
28539 putYMMRegLoAndZU(rG
, mkexpr(resV
));
28541 goto decode_success
;
28543 /* VPERMILPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 0D /r */
28544 if (have66noF2noF3(pfx
)
28545 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
28546 UChar modrm
= getUChar(delta
);
28547 UInt rG
= gregOfRexRM(pfx
, modrm
);
28548 UInt rV
= getVexNvvvv(pfx
);
28549 IRTemp ctrlV
= newTemp(Ity_V256
);
28550 if (epartIsReg(modrm
)) {
28551 UInt rE
= eregOfRexRM(pfx
, modrm
);
28553 DIP("vpermilpd %s,%s,%s\n",
28554 nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
28555 assign(ctrlV
, getYMMReg(rE
));
28557 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28559 DIP("vpermilpd %s,%s,%s\n",
28560 dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
28561 assign(ctrlV
, loadLE(Ity_V256
, mkexpr(addr
)));
28563 IRTemp dataV
= newTemp(Ity_V256
);
28564 assign(dataV
, getYMMReg(rV
));
28565 IRTemp resV
= math_PERMILPD_VAR_256(dataV
, ctrlV
);
28566 putYMMReg(rG
, mkexpr(resV
));
28568 goto decode_success
;
28573 /* VTESTPS xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 0E /r */
28574 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28575 delta
= dis_xTESTy_128( vbi
, pfx
, delta
, True
/*isAvx*/, 32 );
28576 goto decode_success
;
28578 /* VTESTPS ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 0E /r */
28579 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28580 delta
= dis_xTESTy_256( vbi
, pfx
, delta
, 32 );
28581 goto decode_success
;
28586 /* VTESTPD xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 0F /r */
28587 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28588 delta
= dis_xTESTy_128( vbi
, pfx
, delta
, True
/*isAvx*/, 64 );
28589 goto decode_success
;
28591 /* VTESTPD ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 0F /r */
28592 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28593 delta
= dis_xTESTy_256( vbi
, pfx
, delta
, 64 );
28594 goto decode_success
;
28599 /* VCVTPH2PS xmm2/m64, xmm1 = VEX.128.66.0F38.W0 13 /r */
28600 if (have66noF2noF3(pfx
)
28601 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/
28602 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_F16C
)) {
28603 delta
= dis_VCVTPH2PS( vbi
, pfx
, delta
, /*is256bit=*/False
);
28604 goto decode_success
;
28606 /* VCVTPH2PS xmm2/m128, xmm1 = VEX.256.66.0F38.W0 13 /r */
28607 if (have66noF2noF3(pfx
)
28608 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/
28609 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_F16C
)) {
28610 delta
= dis_VCVTPH2PS( vbi
, pfx
, delta
, /*is256bit=*/True
);
28611 goto decode_success
;
28616 /* VPERMPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 16 /r */
28617 if (have66noF2noF3(pfx
)
28618 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
28619 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28620 uses_vvvv
, vbi
, pfx
, delta
, "vpermps", math_VPERMD
);
28621 goto decode_success
;
28626 /* VPTEST xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 17 /r */
28627 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28628 delta
= dis_xTESTy_128( vbi
, pfx
, delta
, True
/*isAvx*/, 0 );
28629 goto decode_success
;
28631 /* VPTEST ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 17 /r */
28632 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28633 delta
= dis_xTESTy_256( vbi
, pfx
, delta
, 0 );
28634 goto decode_success
;
28639 /* VBROADCASTSS m32, xmm1 = VEX.128.66.0F38.WIG 18 /r */
28640 if (have66noF2noF3(pfx
)
28641 && 0==getVexL(pfx
)/*128*/
28642 && !epartIsReg(getUChar(delta
))) {
28643 UChar modrm
= getUChar(delta
);
28644 UInt rG
= gregOfRexRM(pfx
, modrm
);
28645 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28647 DIP("vbroadcastss %s,%s\n", dis_buf
, nameXMMReg(rG
));
28648 IRTemp t32
= newTemp(Ity_I32
);
28649 assign(t32
, loadLE(Ity_I32
, mkexpr(addr
)));
28650 IRTemp t64
= newTemp(Ity_I64
);
28651 assign(t64
, binop(Iop_32HLto64
, mkexpr(t32
), mkexpr(t32
)));
28652 IRExpr
* res
= binop(Iop_64HLtoV128
, mkexpr(t64
), mkexpr(t64
));
28653 putYMMRegLoAndZU(rG
, res
);
28654 goto decode_success
;
28656 /* VBROADCASTSS m32, ymm1 = VEX.256.66.0F38.WIG 18 /r */
28657 if (have66noF2noF3(pfx
)
28658 && 1==getVexL(pfx
)/*256*/
28659 && !epartIsReg(getUChar(delta
))) {
28660 UChar modrm
= getUChar(delta
);
28661 UInt rG
= gregOfRexRM(pfx
, modrm
);
28662 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28664 DIP("vbroadcastss %s,%s\n", dis_buf
, nameYMMReg(rG
));
28665 IRTemp t32
= newTemp(Ity_I32
);
28666 assign(t32
, loadLE(Ity_I32
, mkexpr(addr
)));
28667 IRTemp t64
= newTemp(Ity_I64
);
28668 assign(t64
, binop(Iop_32HLto64
, mkexpr(t32
), mkexpr(t32
)));
28669 IRExpr
* res
= IRExpr_Qop(Iop_64x4toV256
, mkexpr(t64
), mkexpr(t64
),
28670 mkexpr(t64
), mkexpr(t64
));
28671 putYMMReg(rG
, res
);
28672 goto decode_success
;
28674 /* VBROADCASTSS xmm2, xmm1 = VEX.128.66.0F38.WIG 18 /r */
28675 if (have66noF2noF3(pfx
)
28676 && 0==getVexL(pfx
)/*128*/
28677 && epartIsReg(getUChar(delta
))) {
28678 UChar modrm
= getUChar(delta
);
28679 UInt rG
= gregOfRexRM(pfx
, modrm
);
28680 UInt rE
= eregOfRexRM(pfx
, modrm
);
28681 DIP("vbroadcastss %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
28682 IRTemp t32
= newTemp(Ity_I32
);
28683 assign(t32
, getXMMRegLane32(rE
, 0));
28684 IRTemp t64
= newTemp(Ity_I64
);
28685 assign(t64
, binop(Iop_32HLto64
, mkexpr(t32
), mkexpr(t32
)));
28686 IRExpr
* res
= binop(Iop_64HLtoV128
, mkexpr(t64
), mkexpr(t64
));
28687 putYMMRegLoAndZU(rG
, res
);
28689 goto decode_success
;
28691 /* VBROADCASTSS xmm2, ymm1 = VEX.256.66.0F38.WIG 18 /r */
28692 if (have66noF2noF3(pfx
)
28693 && 1==getVexL(pfx
)/*256*/
28694 && epartIsReg(getUChar(delta
))) {
28695 UChar modrm
= getUChar(delta
);
28696 UInt rG
= gregOfRexRM(pfx
, modrm
);
28697 UInt rE
= eregOfRexRM(pfx
, modrm
);
28698 DIP("vbroadcastss %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
));
28699 IRTemp t32
= newTemp(Ity_I32
);
28700 assign(t32
, getXMMRegLane32(rE
, 0));
28701 IRTemp t64
= newTemp(Ity_I64
);
28702 assign(t64
, binop(Iop_32HLto64
, mkexpr(t32
), mkexpr(t32
)));
28703 IRExpr
* res
= IRExpr_Qop(Iop_64x4toV256
, mkexpr(t64
), mkexpr(t64
),
28704 mkexpr(t64
), mkexpr(t64
));
28705 putYMMReg(rG
, res
);
28707 goto decode_success
;
28712 /* VBROADCASTSD m64, ymm1 = VEX.256.66.0F38.WIG 19 /r */
28713 if (have66noF2noF3(pfx
)
28714 && 1==getVexL(pfx
)/*256*/
28715 && !epartIsReg(getUChar(delta
))) {
28716 UChar modrm
= getUChar(delta
);
28717 UInt rG
= gregOfRexRM(pfx
, modrm
);
28718 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28720 DIP("vbroadcastsd %s,%s\n", dis_buf
, nameYMMReg(rG
));
28721 IRTemp t64
= newTemp(Ity_I64
);
28722 assign(t64
, loadLE(Ity_I64
, mkexpr(addr
)));
28723 IRExpr
* res
= IRExpr_Qop(Iop_64x4toV256
, mkexpr(t64
), mkexpr(t64
),
28724 mkexpr(t64
), mkexpr(t64
));
28725 putYMMReg(rG
, res
);
28726 goto decode_success
;
28728 /* VBROADCASTSD xmm2, ymm1 = VEX.256.66.0F38.WIG 19 /r */
28729 if (have66noF2noF3(pfx
)
28730 && 1==getVexL(pfx
)/*256*/
28731 && epartIsReg(getUChar(delta
))) {
28732 UChar modrm
= getUChar(delta
);
28733 UInt rG
= gregOfRexRM(pfx
, modrm
);
28734 UInt rE
= eregOfRexRM(pfx
, modrm
);
28735 DIP("vbroadcastsd %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
));
28736 IRTemp t64
= newTemp(Ity_I64
);
28737 assign(t64
, getXMMRegLane64(rE
, 0));
28738 IRExpr
* res
= IRExpr_Qop(Iop_64x4toV256
, mkexpr(t64
), mkexpr(t64
),
28739 mkexpr(t64
), mkexpr(t64
));
28740 putYMMReg(rG
, res
);
28742 goto decode_success
;
28747 /* VBROADCASTF128 m128, ymm1 = VEX.256.66.0F38.WIG 1A /r */
28748 if (have66noF2noF3(pfx
)
28749 && 1==getVexL(pfx
)/*256*/
28750 && !epartIsReg(getUChar(delta
))) {
28751 UChar modrm
= getUChar(delta
);
28752 UInt rG
= gregOfRexRM(pfx
, modrm
);
28753 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28755 DIP("vbroadcastf128 %s,%s\n", dis_buf
, nameYMMReg(rG
));
28756 IRTemp t128
= newTemp(Ity_V128
);
28757 assign(t128
, loadLE(Ity_V128
, mkexpr(addr
)));
28758 putYMMReg( rG
, binop(Iop_V128HLtoV256
, mkexpr(t128
), mkexpr(t128
)) );
28759 goto decode_success
;
28764 /* VPABSB xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1C /r */
28765 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28766 delta
= dis_AVX128_E_to_G_unary(
28767 uses_vvvv
, vbi
, pfx
, delta
,
28768 "vpabsb", math_PABS_XMM_pap1
);
28769 goto decode_success
;
28771 /* VPABSB ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1C /r */
28772 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28773 delta
= dis_AVX256_E_to_G_unary(
28774 uses_vvvv
, vbi
, pfx
, delta
,
28775 "vpabsb", math_PABS_YMM_pap1
);
28776 goto decode_success
;
28781 /* VPABSW xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1D /r */
28782 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28783 delta
= dis_AVX128_E_to_G_unary(
28784 uses_vvvv
, vbi
, pfx
, delta
,
28785 "vpabsw", math_PABS_XMM_pap2
);
28786 goto decode_success
;
28788 /* VPABSW ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1D /r */
28789 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28790 delta
= dis_AVX256_E_to_G_unary(
28791 uses_vvvv
, vbi
, pfx
, delta
,
28792 "vpabsw", math_PABS_YMM_pap2
);
28793 goto decode_success
;
28798 /* VPABSD xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1E /r */
28799 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28800 delta
= dis_AVX128_E_to_G_unary(
28801 uses_vvvv
, vbi
, pfx
, delta
,
28802 "vpabsd", math_PABS_XMM_pap4
);
28803 goto decode_success
;
28805 /* VPABSD ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1E /r */
28806 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28807 delta
= dis_AVX256_E_to_G_unary(
28808 uses_vvvv
, vbi
, pfx
, delta
,
28809 "vpabsd", math_PABS_YMM_pap4
);
28810 goto decode_success
;
28815 /* VPMOVSXBW xmm2/m64, xmm1 */
28816 /* VPMOVSXBW = VEX.128.66.0F38.WIG 20 /r */
28817 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28818 delta
= dis_PMOVxXBW_128( vbi
, pfx
, delta
,
28819 True
/*isAvx*/, False
/*!xIsZ*/ );
28820 goto decode_success
;
28822 /* VPMOVSXBW xmm2/m128, ymm1 */
28823 /* VPMOVSXBW = VEX.256.66.0F38.WIG 20 /r */
28824 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28825 delta
= dis_PMOVxXBW_256( vbi
, pfx
, delta
, False
/*!xIsZ*/ );
28826 goto decode_success
;
28831 /* VPMOVSXBD xmm2/m32, xmm1 */
28832 /* VPMOVSXBD = VEX.128.66.0F38.WIG 21 /r */
28833 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28834 delta
= dis_PMOVxXBD_128( vbi
, pfx
, delta
,
28835 True
/*isAvx*/, False
/*!xIsZ*/ );
28836 goto decode_success
;
28838 /* VPMOVSXBD xmm2/m64, ymm1 */
28839 /* VPMOVSXBD = VEX.256.66.0F38.WIG 21 /r */
28840 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28841 delta
= dis_PMOVxXBD_256( vbi
, pfx
, delta
, False
/*!xIsZ*/ );
28842 goto decode_success
;
28847 /* VPMOVSXBQ xmm2/m16, xmm1 */
28848 /* VPMOVSXBQ = VEX.128.66.0F38.WIG 22 /r */
28849 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28850 delta
= dis_PMOVSXBQ_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
28851 goto decode_success
;
28853 /* VPMOVSXBQ xmm2/m32, ymm1 */
28854 /* VPMOVSXBQ = VEX.256.66.0F38.WIG 22 /r */
28855 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28856 delta
= dis_PMOVSXBQ_256( vbi
, pfx
, delta
);
28857 goto decode_success
;
28862 /* VPMOVSXWD xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 23 /r */
28863 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28864 delta
= dis_PMOVxXWD_128( vbi
, pfx
, delta
,
28865 True
/*isAvx*/, False
/*!xIsZ*/ );
28866 goto decode_success
;
28868 /* VPMOVSXWD xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 23 /r */
28869 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28870 delta
= dis_PMOVxXWD_256( vbi
, pfx
, delta
, False
/*!xIsZ*/ );
28871 goto decode_success
;
28876 /* VPMOVSXWQ xmm2/m32, xmm1 = VEX.128.66.0F38.WIG 24 /r */
28877 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28878 delta
= dis_PMOVSXWQ_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
28879 goto decode_success
;
28881 /* VPMOVSXWQ xmm2/m64, ymm1 = VEX.256.66.0F38.WIG 24 /r */
28882 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28883 delta
= dis_PMOVSXWQ_256( vbi
, pfx
, delta
);
28884 goto decode_success
;
28889 /* VPMOVSXDQ xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 25 /r */
28890 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28891 delta
= dis_PMOVxXDQ_128( vbi
, pfx
, delta
,
28892 True
/*isAvx*/, False
/*!xIsZ*/ );
28893 goto decode_success
;
28895 /* VPMOVSXDQ xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 25 /r */
28896 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28897 delta
= dis_PMOVxXDQ_256( vbi
, pfx
, delta
, False
/*!xIsZ*/ );
28898 goto decode_success
;
28903 /* VPMULDQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 28 /r */
28904 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28905 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
28906 uses_vvvv
, vbi
, pfx
, delta
,
28907 "vpmuldq", math_PMULDQ_128
);
28908 goto decode_success
;
28910 /* VPMULDQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 28 /r */
28911 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28912 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28913 uses_vvvv
, vbi
, pfx
, delta
,
28914 "vpmuldq", math_PMULDQ_256
);
28915 goto decode_success
;
28920 /* VPCMPEQQ r/m, rV, r ::: r = rV `eq-by-64s` r/m */
28921 /* VPCMPEQQ = VEX.NDS.128.66.0F38.WIG 29 /r */
28922 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28923 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
28924 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpeqq", Iop_CmpEQ64x2
);
28925 goto decode_success
;
28927 /* VPCMPEQQ r/m, rV, r ::: r = rV `eq-by-64s` r/m */
28928 /* VPCMPEQQ = VEX.NDS.256.66.0F38.WIG 29 /r */
28929 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28930 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
28931 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpeqq", Iop_CmpEQ64x4
);
28932 goto decode_success
;
28937 /* VMOVNTDQA m128, xmm1 = VEX.128.66.0F38.WIG 2A /r */
28938 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
28939 && !epartIsReg(getUChar(delta
))) {
28940 UChar modrm
= getUChar(delta
);
28941 UInt rD
= gregOfRexRM(pfx
, modrm
);
28942 IRTemp tD
= newTemp(Ity_V128
);
28943 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28945 gen_SIGNAL_if_not_16_aligned(vbi
, addr
);
28946 assign(tD
, loadLE(Ity_V128
, mkexpr(addr
)));
28947 DIP("vmovntdqa %s,%s\n", dis_buf
, nameXMMReg(rD
));
28948 putYMMRegLoAndZU(rD
, mkexpr(tD
));
28949 goto decode_success
;
28951 /* VMOVNTDQA m256, ymm1 = VEX.256.66.0F38.WIG 2A /r */
28952 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
28953 && !epartIsReg(getUChar(delta
))) {
28954 UChar modrm
= getUChar(delta
);
28955 UInt rD
= gregOfRexRM(pfx
, modrm
);
28956 IRTemp tD
= newTemp(Ity_V256
);
28957 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
28959 gen_SIGNAL_if_not_32_aligned(vbi
, addr
);
28960 assign(tD
, loadLE(Ity_V256
, mkexpr(addr
)));
28961 DIP("vmovntdqa %s,%s\n", dis_buf
, nameYMMReg(rD
));
28962 putYMMReg(rD
, mkexpr(tD
));
28963 goto decode_success
;
28968 /* VPACKUSDW r/m, rV, r ::: r = QNarrowBin32Sto16Ux8(rV, r/m) */
28969 /* VPACKUSDW = VEX.NDS.128.66.0F38.WIG 2B /r */
28970 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
28971 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
28972 uses_vvvv
, vbi
, pfx
, delta
, "vpackusdw",
28973 Iop_QNarrowBin32Sto16Ux8
, NULL
,
28974 False
/*!invertLeftArg*/, True
/*swapArgs*/ );
28975 goto decode_success
;
28977 /* VPACKUSDW r/m, rV, r ::: r = QNarrowBin32Sto16Ux8(rV, r/m) */
28978 /* VPACKUSDW = VEX.NDS.256.66.0F38.WIG 2B /r */
28979 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
28980 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28981 uses_vvvv
, vbi
, pfx
, delta
, "vpackusdw",
28982 math_VPACKUSDW_YMM
);
28983 goto decode_success
;
28988 /* VMASKMOVPS m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 2C /r */
28989 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
28990 && 0==getRexW(pfx
)/*W0*/
28991 && !epartIsReg(getUChar(delta
))) {
28992 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vmaskmovps",
28993 /*!isYMM*/False
, Ity_I32
, /*isLoad*/True
);
28994 goto decode_success
;
28996 /* VMASKMOVPS m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 2C /r */
28997 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
28998 && 0==getRexW(pfx
)/*W0*/
28999 && !epartIsReg(getUChar(delta
))) {
29000 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vmaskmovps",
29001 /*isYMM*/True
, Ity_I32
, /*isLoad*/True
);
29002 goto decode_success
;
29007 /* VMASKMOVPD m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 2D /r */
29008 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29009 && 0==getRexW(pfx
)/*W0*/
29010 && !epartIsReg(getUChar(delta
))) {
29011 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vmaskmovpd",
29012 /*!isYMM*/False
, Ity_I64
, /*isLoad*/True
);
29013 goto decode_success
;
29015 /* VMASKMOVPD m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 2D /r */
29016 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29017 && 0==getRexW(pfx
)/*W0*/
29018 && !epartIsReg(getUChar(delta
))) {
29019 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vmaskmovpd",
29020 /*isYMM*/True
, Ity_I64
, /*isLoad*/True
);
29021 goto decode_success
;
29026 /* VMASKMOVPS xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W0 2E /r */
29027 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29028 && 0==getRexW(pfx
)/*W0*/
29029 && !epartIsReg(getUChar(delta
))) {
29030 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vmaskmovps",
29031 /*!isYMM*/False
, Ity_I32
, /*!isLoad*/False
);
29032 goto decode_success
;
29034 /* VMASKMOVPS ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W0 2E /r */
29035 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29036 && 0==getRexW(pfx
)/*W0*/
29037 && !epartIsReg(getUChar(delta
))) {
29038 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vmaskmovps",
29039 /*isYMM*/True
, Ity_I32
, /*!isLoad*/False
);
29040 goto decode_success
;
29045 /* VMASKMOVPD xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W0 2F /r */
29046 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29047 && 0==getRexW(pfx
)/*W0*/
29048 && !epartIsReg(getUChar(delta
))) {
29049 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vmaskmovpd",
29050 /*!isYMM*/False
, Ity_I64
, /*!isLoad*/False
);
29051 goto decode_success
;
29053 /* VMASKMOVPD ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W0 2F /r */
29054 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29055 && 0==getRexW(pfx
)/*W0*/
29056 && !epartIsReg(getUChar(delta
))) {
29057 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vmaskmovpd",
29058 /*isYMM*/True
, Ity_I64
, /*!isLoad*/False
);
29059 goto decode_success
;
29064 /* VPMOVZXBW xmm2/m64, xmm1 */
29065 /* VPMOVZXBW = VEX.128.66.0F38.WIG 30 /r */
29066 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29067 delta
= dis_PMOVxXBW_128( vbi
, pfx
, delta
,
29068 True
/*isAvx*/, True
/*xIsZ*/ );
29069 goto decode_success
;
29071 /* VPMOVZXBW xmm2/m128, ymm1 */
29072 /* VPMOVZXBW = VEX.256.66.0F38.WIG 30 /r */
29073 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29074 delta
= dis_PMOVxXBW_256( vbi
, pfx
, delta
, True
/*xIsZ*/ );
29075 goto decode_success
;
29080 /* VPMOVZXBD xmm2/m32, xmm1 */
29081 /* VPMOVZXBD = VEX.128.66.0F38.WIG 31 /r */
29082 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29083 delta
= dis_PMOVxXBD_128( vbi
, pfx
, delta
,
29084 True
/*isAvx*/, True
/*xIsZ*/ );
29085 goto decode_success
;
29087 /* VPMOVZXBD xmm2/m64, ymm1 */
29088 /* VPMOVZXBD = VEX.256.66.0F38.WIG 31 /r */
29089 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29090 delta
= dis_PMOVxXBD_256( vbi
, pfx
, delta
, True
/*xIsZ*/ );
29091 goto decode_success
;
29096 /* VPMOVZXBQ xmm2/m16, xmm1 */
29097 /* VPMOVZXBQ = VEX.128.66.0F38.WIG 32 /r */
29098 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29099 delta
= dis_PMOVZXBQ_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
29100 goto decode_success
;
29102 /* VPMOVZXBQ xmm2/m32, ymm1 */
29103 /* VPMOVZXBQ = VEX.256.66.0F38.WIG 32 /r */
29104 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29105 delta
= dis_PMOVZXBQ_256( vbi
, pfx
, delta
);
29106 goto decode_success
;
29111 /* VPMOVZXWD xmm2/m64, xmm1 */
29112 /* VPMOVZXWD = VEX.128.66.0F38.WIG 33 /r */
29113 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29114 delta
= dis_PMOVxXWD_128( vbi
, pfx
, delta
,
29115 True
/*isAvx*/, True
/*xIsZ*/ );
29116 goto decode_success
;
29118 /* VPMOVZXWD xmm2/m128, ymm1 */
29119 /* VPMOVZXWD = VEX.256.66.0F38.WIG 33 /r */
29120 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29121 delta
= dis_PMOVxXWD_256( vbi
, pfx
, delta
, True
/*xIsZ*/ );
29122 goto decode_success
;
29127 /* VPMOVZXWQ xmm2/m32, xmm1 = VEX.128.66.0F38.WIG 34 /r */
29128 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29129 delta
= dis_PMOVZXWQ_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
29130 goto decode_success
;
29132 /* VPMOVZXWQ xmm2/m64, ymm1 = VEX.256.66.0F38.WIG 34 /r */
29133 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29134 delta
= dis_PMOVZXWQ_256( vbi
, pfx
, delta
);
29135 goto decode_success
;
29140 /* VPMOVZXDQ xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 35 /r */
29141 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29142 delta
= dis_PMOVxXDQ_128( vbi
, pfx
, delta
,
29143 True
/*isAvx*/, True
/*xIsZ*/ );
29144 goto decode_success
;
29146 /* VPMOVZXDQ xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 35 /r */
29147 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29148 delta
= dis_PMOVxXDQ_256( vbi
, pfx
, delta
, True
/*xIsZ*/ );
29149 goto decode_success
;
29154 /* VPERMD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 36 /r */
29155 if (have66noF2noF3(pfx
)
29156 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
29157 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
29158 uses_vvvv
, vbi
, pfx
, delta
, "vpermd", math_VPERMD
);
29159 goto decode_success
;
29164 /* VPCMPGTQ r/m, rV, r ::: r = rV `>s-by-64s` r/m */
29165 /* VPCMPGTQ = VEX.NDS.128.66.0F38.WIG 37 /r */
29166 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29167 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29168 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpgtq", Iop_CmpGT64Sx2
);
29169 goto decode_success
;
29171 /* VPCMPGTQ r/m, rV, r ::: r = rV `>s-by-64s` r/m */
29172 /* VPCMPGTQ = VEX.NDS.256.66.0F38.WIG 37 /r */
29173 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29174 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29175 uses_vvvv
, vbi
, pfx
, delta
, "vpcmpgtq", Iop_CmpGT64Sx4
);
29176 goto decode_success
;
29181 /* VPMINSB r/m, rV, r ::: r = min-signed-8s(rV, r/m) */
29182 /* VPMINSB = VEX.NDS.128.66.0F38.WIG 38 /r */
29183 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29184 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29185 uses_vvvv
, vbi
, pfx
, delta
, "vpminsb", Iop_Min8Sx16
);
29186 goto decode_success
;
29188 /* VPMINSB r/m, rV, r ::: r = min-signed-8s(rV, r/m) */
29189 /* VPMINSB = VEX.NDS.256.66.0F38.WIG 38 /r */
29190 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29191 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29192 uses_vvvv
, vbi
, pfx
, delta
, "vpminsb", Iop_Min8Sx32
);
29193 goto decode_success
;
29198 /* VPMINSD r/m, rV, r ::: r = min-signed-32s(rV, r/m) */
29199 /* VPMINSD = VEX.NDS.128.66.0F38.WIG 39 /r */
29200 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29201 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29202 uses_vvvv
, vbi
, pfx
, delta
, "vpminsd", Iop_Min32Sx4
);
29203 goto decode_success
;
29205 /* VPMINSD r/m, rV, r ::: r = min-signed-32s(rV, r/m) */
29206 /* VPMINSD = VEX.NDS.256.66.0F38.WIG 39 /r */
29207 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29208 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29209 uses_vvvv
, vbi
, pfx
, delta
, "vpminsd", Iop_Min32Sx8
);
29210 goto decode_success
;
29215 /* VPMINUW r/m, rV, r ::: r = min-unsigned-16s(rV, r/m) */
29216 /* VPMINUW = VEX.NDS.128.66.0F38.WIG 3A /r */
29217 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29218 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29219 uses_vvvv
, vbi
, pfx
, delta
, "vpminuw", Iop_Min16Ux8
);
29220 goto decode_success
;
29222 /* VPMINUW r/m, rV, r ::: r = min-unsigned-16s(rV, r/m) */
29223 /* VPMINUW = VEX.NDS.256.66.0F38.WIG 3A /r */
29224 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29225 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29226 uses_vvvv
, vbi
, pfx
, delta
, "vpminuw", Iop_Min16Ux16
);
29227 goto decode_success
;
29232 /* VPMINUD r/m, rV, r ::: r = min-unsigned-32s(rV, r/m) */
29233 /* VPMINUD = VEX.NDS.128.66.0F38.WIG 3B /r */
29234 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29235 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29236 uses_vvvv
, vbi
, pfx
, delta
, "vpminud", Iop_Min32Ux4
);
29237 goto decode_success
;
29239 /* VPMINUD r/m, rV, r ::: r = min-unsigned-32s(rV, r/m) */
29240 /* VPMINUD = VEX.NDS.256.66.0F38.WIG 3B /r */
29241 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29242 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29243 uses_vvvv
, vbi
, pfx
, delta
, "vpminud", Iop_Min32Ux8
);
29244 goto decode_success
;
29249 /* VPMAXSB r/m, rV, r ::: r = max-signed-8s(rV, r/m) */
29250 /* VPMAXSB = VEX.NDS.128.66.0F38.WIG 3C /r */
29251 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29252 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29253 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxsb", Iop_Max8Sx16
);
29254 goto decode_success
;
29256 /* VPMAXSB r/m, rV, r ::: r = max-signed-8s(rV, r/m) */
29257 /* VPMAXSB = VEX.NDS.256.66.0F38.WIG 3C /r */
29258 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29259 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29260 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxsb", Iop_Max8Sx32
);
29261 goto decode_success
;
29266 /* VPMAXSD r/m, rV, r ::: r = max-signed-32s(rV, r/m) */
29267 /* VPMAXSD = VEX.NDS.128.66.0F38.WIG 3D /r */
29268 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29269 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29270 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxsd", Iop_Max32Sx4
);
29271 goto decode_success
;
29273 /* VPMAXSD r/m, rV, r ::: r = max-signed-32s(rV, r/m) */
29274 /* VPMAXSD = VEX.NDS.256.66.0F38.WIG 3D /r */
29275 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29276 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29277 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxsd", Iop_Max32Sx8
);
29278 goto decode_success
;
29283 /* VPMAXUW r/m, rV, r ::: r = max-unsigned-16s(rV, r/m) */
29284 /* VPMAXUW = VEX.NDS.128.66.0F38.WIG 3E /r */
29285 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29286 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29287 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxuw", Iop_Max16Ux8
);
29288 goto decode_success
;
29290 /* VPMAXUW r/m, rV, r ::: r = max-unsigned-16s(rV, r/m) */
29291 /* VPMAXUW = VEX.NDS.256.66.0F38.WIG 3E /r */
29292 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29293 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29294 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxuw", Iop_Max16Ux16
);
29295 goto decode_success
;
29300 /* VPMAXUD r/m, rV, r ::: r = max-unsigned-32s(rV, r/m) */
29301 /* VPMAXUD = VEX.NDS.128.66.0F38.WIG 3F /r */
29302 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29303 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29304 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxud", Iop_Max32Ux4
);
29305 goto decode_success
;
29307 /* VPMAXUD r/m, rV, r ::: r = max-unsigned-32s(rV, r/m) */
29308 /* VPMAXUD = VEX.NDS.256.66.0F38.WIG 3F /r */
29309 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29310 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29311 uses_vvvv
, vbi
, pfx
, delta
, "vpmaxud", Iop_Max32Ux8
);
29312 goto decode_success
;
29317 /* VPMULLD r/m, rV, r ::: r = mul-32s(rV, r/m) */
29318 /* VPMULLD = VEX.NDS.128.66.0F38.WIG 40 /r */
29319 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29320 delta
= dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29321 uses_vvvv
, vbi
, pfx
, delta
, "vpmulld", Iop_Mul32x4
);
29322 goto decode_success
;
29324 /* VPMULLD r/m, rV, r ::: r = mul-32s(rV, r/m) */
29325 /* VPMULLD = VEX.NDS.256.66.0F38.WIG 40 /r */
29326 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
29327 delta
= dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29328 uses_vvvv
, vbi
, pfx
, delta
, "vpmulld", Iop_Mul32x8
);
29329 goto decode_success
;
29334 /* VPHMINPOSUW xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 41 /r */
29335 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29336 delta
= dis_PHMINPOSUW_128( vbi
, pfx
, delta
, True
/*isAvx*/ );
29337 goto decode_success
;
29342 /* VPSRLVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 45 /r */
29343 /* VPSRLVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 45 /r */
29344 if (have66noF2noF3(pfx
) && 0==getRexW(pfx
)/*W0*/) {
29345 delta
= dis_AVX_var_shiftV_byE( vbi
, pfx
, delta
, "vpsrlvd",
29346 Iop_Shr32
, 1==getVexL(pfx
) );
29348 goto decode_success
;
29350 /* VPSRLVQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 45 /r */
29351 /* VPSRLVQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 45 /r */
29352 if (have66noF2noF3(pfx
) && 1==getRexW(pfx
)/*W1*/) {
29353 delta
= dis_AVX_var_shiftV_byE( vbi
, pfx
, delta
, "vpsrlvq",
29354 Iop_Shr64
, 1==getVexL(pfx
) );
29356 goto decode_success
;
29361 /* VPSRAVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 46 /r */
29362 /* VPSRAVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 46 /r */
29363 if (have66noF2noF3(pfx
) && 0==getRexW(pfx
)/*W0*/) {
29364 delta
= dis_AVX_var_shiftV_byE( vbi
, pfx
, delta
, "vpsravd",
29365 Iop_Sar32
, 1==getVexL(pfx
) );
29367 goto decode_success
;
29372 /* VPSLLVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 47 /r */
29373 /* VPSLLVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 47 /r */
29374 if (have66noF2noF3(pfx
) && 0==getRexW(pfx
)/*W0*/) {
29375 delta
= dis_AVX_var_shiftV_byE( vbi
, pfx
, delta
, "vpsllvd",
29376 Iop_Shl32
, 1==getVexL(pfx
) );
29378 goto decode_success
;
29380 /* VPSLLVQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 47 /r */
29381 /* VPSLLVQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 47 /r */
29382 if (have66noF2noF3(pfx
) && 1==getRexW(pfx
)/*W1*/) {
29383 delta
= dis_AVX_var_shiftV_byE( vbi
, pfx
, delta
, "vpsllvq",
29384 Iop_Shl64
, 1==getVexL(pfx
) );
29386 goto decode_success
;
29391 /* VPBROADCASTD xmm2/m32, xmm1 = VEX.128.66.0F38.W0 58 /r */
29392 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29393 && 0==getRexW(pfx
)/*W0*/) {
29394 UChar modrm
= getUChar(delta
);
29395 UInt rG
= gregOfRexRM(pfx
, modrm
);
29396 IRTemp t32
= newTemp(Ity_I32
);
29397 if (epartIsReg(modrm
)) {
29398 UInt rE
= eregOfRexRM(pfx
, modrm
);
29400 DIP("vpbroadcastd %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
29401 assign(t32
, getXMMRegLane32(rE
, 0));
29403 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29405 DIP("vpbroadcastd %s,%s\n", dis_buf
, nameXMMReg(rG
));
29406 assign(t32
, loadLE(Ity_I32
, mkexpr(addr
)));
29408 IRTemp t64
= newTemp(Ity_I64
);
29409 assign(t64
, binop(Iop_32HLto64
, mkexpr(t32
), mkexpr(t32
)));
29410 IRExpr
* res
= binop(Iop_64HLtoV128
, mkexpr(t64
), mkexpr(t64
));
29411 putYMMRegLoAndZU(rG
, res
);
29412 goto decode_success
;
29414 /* VPBROADCASTD xmm2/m32, ymm1 = VEX.256.66.0F38.W0 58 /r */
29415 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29416 && 0==getRexW(pfx
)/*W0*/) {
29417 UChar modrm
= getUChar(delta
);
29418 UInt rG
= gregOfRexRM(pfx
, modrm
);
29419 IRTemp t32
= newTemp(Ity_I32
);
29420 if (epartIsReg(modrm
)) {
29421 UInt rE
= eregOfRexRM(pfx
, modrm
);
29423 DIP("vpbroadcastd %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
));
29424 assign(t32
, getXMMRegLane32(rE
, 0));
29426 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29428 DIP("vpbroadcastd %s,%s\n", dis_buf
, nameYMMReg(rG
));
29429 assign(t32
, loadLE(Ity_I32
, mkexpr(addr
)));
29431 IRTemp t64
= newTemp(Ity_I64
);
29432 assign(t64
, binop(Iop_32HLto64
, mkexpr(t32
), mkexpr(t32
)));
29433 IRExpr
* res
= IRExpr_Qop(Iop_64x4toV256
, mkexpr(t64
), mkexpr(t64
),
29434 mkexpr(t64
), mkexpr(t64
));
29435 putYMMReg(rG
, res
);
29436 goto decode_success
;
29441 /* VPBROADCASTQ xmm2/m64, xmm1 = VEX.128.66.0F38.W0 59 /r */
29442 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29443 && 0==getRexW(pfx
)/*W0*/) {
29444 UChar modrm
= getUChar(delta
);
29445 UInt rG
= gregOfRexRM(pfx
, modrm
);
29446 IRTemp t64
= newTemp(Ity_I64
);
29447 if (epartIsReg(modrm
)) {
29448 UInt rE
= eregOfRexRM(pfx
, modrm
);
29450 DIP("vpbroadcastq %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
29451 assign(t64
, getXMMRegLane64(rE
, 0));
29453 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29455 DIP("vpbroadcastq %s,%s\n", dis_buf
, nameXMMReg(rG
));
29456 assign(t64
, loadLE(Ity_I64
, mkexpr(addr
)));
29458 IRExpr
* res
= binop(Iop_64HLtoV128
, mkexpr(t64
), mkexpr(t64
));
29459 putYMMRegLoAndZU(rG
, res
);
29460 goto decode_success
;
29462 /* VPBROADCASTQ xmm2/m64, ymm1 = VEX.256.66.0F38.W0 59 /r */
29463 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29464 && 0==getRexW(pfx
)/*W0*/) {
29465 UChar modrm
= getUChar(delta
);
29466 UInt rG
= gregOfRexRM(pfx
, modrm
);
29467 IRTemp t64
= newTemp(Ity_I64
);
29468 if (epartIsReg(modrm
)) {
29469 UInt rE
= eregOfRexRM(pfx
, modrm
);
29471 DIP("vpbroadcastq %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
));
29472 assign(t64
, getXMMRegLane64(rE
, 0));
29474 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29476 DIP("vpbroadcastq %s,%s\n", dis_buf
, nameYMMReg(rG
));
29477 assign(t64
, loadLE(Ity_I64
, mkexpr(addr
)));
29479 IRExpr
* res
= IRExpr_Qop(Iop_64x4toV256
, mkexpr(t64
), mkexpr(t64
),
29480 mkexpr(t64
), mkexpr(t64
));
29481 putYMMReg(rG
, res
);
29482 goto decode_success
;
29487 /* VBROADCASTI128 m128, ymm1 = VEX.256.66.0F38.WIG 5A /r */
29488 if (have66noF2noF3(pfx
)
29489 && 1==getVexL(pfx
)/*256*/
29490 && !epartIsReg(getUChar(delta
))) {
29491 UChar modrm
= getUChar(delta
);
29492 UInt rG
= gregOfRexRM(pfx
, modrm
);
29493 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29495 DIP("vbroadcasti128 %s,%s\n", dis_buf
, nameYMMReg(rG
));
29496 IRTemp t128
= newTemp(Ity_V128
);
29497 assign(t128
, loadLE(Ity_V128
, mkexpr(addr
)));
29498 putYMMReg( rG
, binop(Iop_V128HLtoV256
, mkexpr(t128
), mkexpr(t128
)) );
29499 goto decode_success
;
29504 /* VPBROADCASTB xmm2/m8, xmm1 = VEX.128.66.0F38.W0 78 /r */
29505 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29506 && 0==getRexW(pfx
)/*W0*/) {
29507 UChar modrm
= getUChar(delta
);
29508 UInt rG
= gregOfRexRM(pfx
, modrm
);
29509 IRTemp t8
= newTemp(Ity_I8
);
29510 if (epartIsReg(modrm
)) {
29511 UInt rE
= eregOfRexRM(pfx
, modrm
);
29513 DIP("vpbroadcastb %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
29514 assign(t8
, unop(Iop_32to8
, getXMMRegLane32(rE
, 0)));
29516 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29518 DIP("vpbroadcastb %s,%s\n", dis_buf
, nameXMMReg(rG
));
29519 assign(t8
, loadLE(Ity_I8
, mkexpr(addr
)));
29521 IRTemp t16
= newTemp(Ity_I16
);
29522 assign(t16
, binop(Iop_8HLto16
, mkexpr(t8
), mkexpr(t8
)));
29523 IRTemp t32
= newTemp(Ity_I32
);
29524 assign(t32
, binop(Iop_16HLto32
, mkexpr(t16
), mkexpr(t16
)));
29525 IRTemp t64
= newTemp(Ity_I64
);
29526 assign(t64
, binop(Iop_32HLto64
, mkexpr(t32
), mkexpr(t32
)));
29527 IRExpr
* res
= binop(Iop_64HLtoV128
, mkexpr(t64
), mkexpr(t64
));
29528 putYMMRegLoAndZU(rG
, res
);
29529 goto decode_success
;
29531 /* VPBROADCASTB xmm2/m8, ymm1 = VEX.256.66.0F38.W0 78 /r */
29532 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29533 && 0==getRexW(pfx
)/*W0*/) {
29534 UChar modrm
= getUChar(delta
);
29535 UInt rG
= gregOfRexRM(pfx
, modrm
);
29536 IRTemp t8
= newTemp(Ity_I8
);
29537 if (epartIsReg(modrm
)) {
29538 UInt rE
= eregOfRexRM(pfx
, modrm
);
29540 DIP("vpbroadcastb %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
));
29541 assign(t8
, unop(Iop_32to8
, getXMMRegLane32(rE
, 0)));
29543 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29545 DIP("vpbroadcastb %s,%s\n", dis_buf
, nameYMMReg(rG
));
29546 assign(t8
, loadLE(Ity_I8
, mkexpr(addr
)));
29548 IRTemp t16
= newTemp(Ity_I16
);
29549 assign(t16
, binop(Iop_8HLto16
, mkexpr(t8
), mkexpr(t8
)));
29550 IRTemp t32
= newTemp(Ity_I32
);
29551 assign(t32
, binop(Iop_16HLto32
, mkexpr(t16
), mkexpr(t16
)));
29552 IRTemp t64
= newTemp(Ity_I64
);
29553 assign(t64
, binop(Iop_32HLto64
, mkexpr(t32
), mkexpr(t32
)));
29554 IRExpr
* res
= IRExpr_Qop(Iop_64x4toV256
, mkexpr(t64
), mkexpr(t64
),
29555 mkexpr(t64
), mkexpr(t64
));
29556 putYMMReg(rG
, res
);
29557 goto decode_success
;
29562 /* VPBROADCASTW xmm2/m16, xmm1 = VEX.128.66.0F38.W0 79 /r */
29563 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29564 && 0==getRexW(pfx
)/*W0*/) {
29565 UChar modrm
= getUChar(delta
);
29566 UInt rG
= gregOfRexRM(pfx
, modrm
);
29567 IRTemp t16
= newTemp(Ity_I16
);
29568 if (epartIsReg(modrm
)) {
29569 UInt rE
= eregOfRexRM(pfx
, modrm
);
29571 DIP("vpbroadcastw %s,%s\n", nameXMMReg(rE
), nameXMMReg(rG
));
29572 assign(t16
, unop(Iop_32to16
, getXMMRegLane32(rE
, 0)));
29574 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29576 DIP("vpbroadcastw %s,%s\n", dis_buf
, nameXMMReg(rG
));
29577 assign(t16
, loadLE(Ity_I16
, mkexpr(addr
)));
29579 IRTemp t32
= newTemp(Ity_I32
);
29580 assign(t32
, binop(Iop_16HLto32
, mkexpr(t16
), mkexpr(t16
)));
29581 IRTemp t64
= newTemp(Ity_I64
);
29582 assign(t64
, binop(Iop_32HLto64
, mkexpr(t32
), mkexpr(t32
)));
29583 IRExpr
* res
= binop(Iop_64HLtoV128
, mkexpr(t64
), mkexpr(t64
));
29584 putYMMRegLoAndZU(rG
, res
);
29585 goto decode_success
;
29587 /* VPBROADCASTW xmm2/m16, ymm1 = VEX.256.66.0F38.W0 79 /r */
29588 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29589 && 0==getRexW(pfx
)/*W0*/) {
29590 UChar modrm
= getUChar(delta
);
29591 UInt rG
= gregOfRexRM(pfx
, modrm
);
29592 IRTemp t16
= newTemp(Ity_I16
);
29593 if (epartIsReg(modrm
)) {
29594 UInt rE
= eregOfRexRM(pfx
, modrm
);
29596 DIP("vpbroadcastw %s,%s\n", nameXMMReg(rE
), nameYMMReg(rG
));
29597 assign(t16
, unop(Iop_32to16
, getXMMRegLane32(rE
, 0)));
29599 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29601 DIP("vpbroadcastw %s,%s\n", dis_buf
, nameYMMReg(rG
));
29602 assign(t16
, loadLE(Ity_I16
, mkexpr(addr
)));
29604 IRTemp t32
= newTemp(Ity_I32
);
29605 assign(t32
, binop(Iop_16HLto32
, mkexpr(t16
), mkexpr(t16
)));
29606 IRTemp t64
= newTemp(Ity_I64
);
29607 assign(t64
, binop(Iop_32HLto64
, mkexpr(t32
), mkexpr(t32
)));
29608 IRExpr
* res
= IRExpr_Qop(Iop_64x4toV256
, mkexpr(t64
), mkexpr(t64
),
29609 mkexpr(t64
), mkexpr(t64
));
29610 putYMMReg(rG
, res
);
29611 goto decode_success
;
29616 /* VPMASKMOVD m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 8C /r */
29617 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29618 && 0==getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29619 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vpmaskmovd",
29620 /*!isYMM*/False
, Ity_I32
, /*isLoad*/True
);
29621 goto decode_success
;
29623 /* VPMASKMOVD m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 8C /r */
29624 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29625 && 0==getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29626 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vpmaskmovd",
29627 /*isYMM*/True
, Ity_I32
, /*isLoad*/True
);
29628 goto decode_success
;
29630 /* VPMASKMOVQ m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 8C /r */
29631 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29632 && 1==getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29633 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vpmaskmovq",
29634 /*!isYMM*/False
, Ity_I64
, /*isLoad*/True
);
29635 goto decode_success
;
29637 /* VPMASKMOVQ m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 8C /r */
29638 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29639 && 1==getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29640 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vpmaskmovq",
29641 /*isYMM*/True
, Ity_I64
, /*isLoad*/True
);
29642 goto decode_success
;
29647 /* VPMASKMOVD xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W0 8E /r */
29648 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29649 && 0==getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29650 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vpmaskmovd",
29651 /*!isYMM*/False
, Ity_I32
, /*!isLoad*/False
);
29652 goto decode_success
;
29654 /* VPMASKMOVD ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W0 8E /r */
29655 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29656 && 0==getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29657 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vpmaskmovd",
29658 /*isYMM*/True
, Ity_I32
, /*!isLoad*/False
);
29659 goto decode_success
;
29661 /* VPMASKMOVQ xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W1 8E /r */
29662 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29663 && 1==getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29664 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vpmaskmovq",
29665 /*!isYMM*/False
, Ity_I64
, /*!isLoad*/False
);
29666 goto decode_success
;
29668 /* VPMASKMOVQ ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W1 8E /r */
29669 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29670 && 1==getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29671 delta
= dis_VMASKMOV( uses_vvvv
, vbi
, pfx
, delta
, "vpmaskmovq",
29672 /*isYMM*/True
, Ity_I64
, /*!isLoad*/False
);
29673 goto decode_success
;
29678 /* VPGATHERDD xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W0 90 /r */
29679 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29680 && 0 == getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29681 Long delta0
= delta
;
29682 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vpgatherdd",
29683 /*!isYMM*/False
, /*!isVM64x*/False
, Ity_I32
);
29684 if (delta
!= delta0
)
29685 goto decode_success
;
29687 /* VPGATHERDD ymm2, vm32y, ymm1 = VEX.DDS.256.66.0F38.W0 90 /r */
29688 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29689 && 0 == getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29690 Long delta0
= delta
;
29691 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vpgatherdd",
29692 /*isYMM*/True
, /*!isVM64x*/False
, Ity_I32
);
29693 if (delta
!= delta0
)
29694 goto decode_success
;
29696 /* VPGATHERDQ xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W1 90 /r */
29697 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29698 && 1 == getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29699 Long delta0
= delta
;
29700 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vpgatherdq",
29701 /*!isYMM*/False
, /*!isVM64x*/False
, Ity_I64
);
29702 if (delta
!= delta0
)
29703 goto decode_success
;
29705 /* VPGATHERDQ ymm2, vm32x, ymm1 = VEX.DDS.256.66.0F38.W1 90 /r */
29706 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29707 && 1 == getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29708 Long delta0
= delta
;
29709 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vpgatherdq",
29710 /*isYMM*/True
, /*!isVM64x*/False
, Ity_I64
);
29711 if (delta
!= delta0
)
29712 goto decode_success
;
29717 /* VPGATHERQD xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W0 91 /r */
29718 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29719 && 0 == getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29720 Long delta0
= delta
;
29721 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vpgatherqd",
29722 /*!isYMM*/False
, /*isVM64x*/True
, Ity_I32
);
29723 if (delta
!= delta0
)
29724 goto decode_success
;
29726 /* VPGATHERQD xmm2, vm64y, xmm1 = VEX.DDS.256.66.0F38.W0 91 /r */
29727 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29728 && 0 == getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29729 Long delta0
= delta
;
29730 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vpgatherqd",
29731 /*isYMM*/True
, /*isVM64x*/True
, Ity_I32
);
29732 if (delta
!= delta0
)
29733 goto decode_success
;
29735 /* VPGATHERQQ xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W1 91 /r */
29736 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29737 && 1 == getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29738 Long delta0
= delta
;
29739 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vpgatherqq",
29740 /*!isYMM*/False
, /*isVM64x*/True
, Ity_I64
);
29741 if (delta
!= delta0
)
29742 goto decode_success
;
29744 /* VPGATHERQQ ymm2, vm64y, ymm1 = VEX.DDS.256.66.0F38.W1 91 /r */
29745 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29746 && 1 == getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29747 Long delta0
= delta
;
29748 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vpgatherqq",
29749 /*isYMM*/True
, /*isVM64x*/True
, Ity_I64
);
29750 if (delta
!= delta0
)
29751 goto decode_success
;
29756 /* VGATHERDPS xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W0 92 /r */
29757 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29758 && 0 == getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29759 Long delta0
= delta
;
29760 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vgatherdps",
29761 /*!isYMM*/False
, /*!isVM64x*/False
, Ity_I32
);
29762 if (delta
!= delta0
)
29763 goto decode_success
;
29765 /* VGATHERDPS ymm2, vm32y, ymm1 = VEX.DDS.256.66.0F38.W0 92 /r */
29766 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29767 && 0 == getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29768 Long delta0
= delta
;
29769 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vgatherdps",
29770 /*isYMM*/True
, /*!isVM64x*/False
, Ity_I32
);
29771 if (delta
!= delta0
)
29772 goto decode_success
;
29774 /* VGATHERDPD xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W1 92 /r */
29775 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29776 && 1 == getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29777 Long delta0
= delta
;
29778 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vgatherdpd",
29779 /*!isYMM*/False
, /*!isVM64x*/False
, Ity_I64
);
29780 if (delta
!= delta0
)
29781 goto decode_success
;
29783 /* VGATHERDPD ymm2, vm32x, ymm1 = VEX.DDS.256.66.0F38.W1 92 /r */
29784 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29785 && 1 == getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29786 Long delta0
= delta
;
29787 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vgatherdpd",
29788 /*isYMM*/True
, /*!isVM64x*/False
, Ity_I64
);
29789 if (delta
!= delta0
)
29790 goto decode_success
;
29795 /* VGATHERQPS xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W0 93 /r */
29796 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29797 && 0 == getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29798 Long delta0
= delta
;
29799 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vgatherqps",
29800 /*!isYMM*/False
, /*isVM64x*/True
, Ity_I32
);
29801 if (delta
!= delta0
)
29802 goto decode_success
;
29804 /* VGATHERQPS xmm2, vm64y, xmm1 = VEX.DDS.256.66.0F38.W0 93 /r */
29805 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29806 && 0 == getRexW(pfx
)/*W0*/ && !epartIsReg(getUChar(delta
))) {
29807 Long delta0
= delta
;
29808 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vgatherqps",
29809 /*isYMM*/True
, /*isVM64x*/True
, Ity_I32
);
29810 if (delta
!= delta0
)
29811 goto decode_success
;
29813 /* VGATHERQPD xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W1 93 /r */
29814 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/
29815 && 1 == getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29816 Long delta0
= delta
;
29817 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vgatherqpd",
29818 /*!isYMM*/False
, /*isVM64x*/True
, Ity_I64
);
29819 if (delta
!= delta0
)
29820 goto decode_success
;
29822 /* VGATHERQPD ymm2, vm64y, ymm1 = VEX.DDS.256.66.0F38.W1 93 /r */
29823 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
29824 && 1 == getRexW(pfx
)/*W1*/ && !epartIsReg(getUChar(delta
))) {
29825 Long delta0
= delta
;
29826 delta
= dis_VGATHER( uses_vvvv
, vbi
, pfx
, delta
, "vgatherqpd",
29827 /*isYMM*/True
, /*isVM64x*/True
, Ity_I64
);
29828 if (delta
!= delta0
)
29829 goto decode_success
;
29833 case 0x96 ... 0x9F:
29834 case 0xA6 ... 0xAF:
29835 case 0xB6 ... 0xBF:
29836 /* VFMADDSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 96 /r */
29837 /* VFMADDSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 96 /r */
29838 /* VFMADDSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 96 /r */
29839 /* VFMADDSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 96 /r */
29840 /* VFMSUBADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 97 /r */
29841 /* VFMSUBADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 97 /r */
29842 /* VFMSUBADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 97 /r */
29843 /* VFMSUBADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 97 /r */
29844 /* VFMADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 98 /r */
29845 /* VFMADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 98 /r */
29846 /* VFMADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 98 /r */
29847 /* VFMADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 98 /r */
29848 /* VFMADD132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 99 /r */
29849 /* VFMADD132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 99 /r */
29850 /* VFMSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9A /r */
29851 /* VFMSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9A /r */
29852 /* VFMSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9A /r */
29853 /* VFMSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9A /r */
29854 /* VFMSUB132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9B /r */
29855 /* VFMSUB132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9B /r */
29856 /* VFNMADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9C /r */
29857 /* VFNMADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9C /r */
29858 /* VFNMADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9C /r */
29859 /* VFNMADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9C /r */
29860 /* VFNMADD132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9D /r */
29861 /* VFNMADD132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9D /r */
29862 /* VFNMSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9E /r */
29863 /* VFNMSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9E /r */
29864 /* VFNMSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9E /r */
29865 /* VFNMSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9E /r */
29866 /* VFNMSUB132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9F /r */
29867 /* VFNMSUB132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9F /r */
29868 /* VFMADDSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A6 /r */
29869 /* VFMADDSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A6 /r */
29870 /* VFMADDSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A6 /r */
29871 /* VFMADDSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A6 /r */
29872 /* VFMSUBADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A7 /r */
29873 /* VFMSUBADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A7 /r */
29874 /* VFMSUBADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A7 /r */
29875 /* VFMSUBADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A7 /r */
29876 /* VFMADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A8 /r */
29877 /* VFMADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A8 /r */
29878 /* VFMADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A8 /r */
29879 /* VFMADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A8 /r */
29880 /* VFMADD213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 A9 /r */
29881 /* VFMADD213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 A9 /r */
29882 /* VFMSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AA /r */
29883 /* VFMSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AA /r */
29884 /* VFMSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AA /r */
29885 /* VFMSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AA /r */
29886 /* VFMSUB213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AB /r */
29887 /* VFMSUB213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AB /r */
29888 /* VFNMADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AC /r */
29889 /* VFNMADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AC /r */
29890 /* VFNMADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AC /r */
29891 /* VFNMADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AC /r */
29892 /* VFNMADD213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AD /r */
29893 /* VFNMADD213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AD /r */
29894 /* VFNMSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AE /r */
29895 /* VFNMSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AE /r */
29896 /* VFNMSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AE /r */
29897 /* VFNMSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AE /r */
29898 /* VFNMSUB213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AF /r */
29899 /* VFNMSUB213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AF /r */
29900 /* VFMADDSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B6 /r */
29901 /* VFMADDSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B6 /r */
29902 /* VFMADDSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B6 /r */
29903 /* VFMADDSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B6 /r */
29904 /* VFMSUBADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B7 /r */
29905 /* VFMSUBADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B7 /r */
29906 /* VFMSUBADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B7 /r */
29907 /* VFMSUBADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B7 /r */
29908 /* VFMADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B8 /r */
29909 /* VFMADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B8 /r */
29910 /* VFMADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B8 /r */
29911 /* VFMADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B8 /r */
29912 /* VFMADD231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 B9 /r */
29913 /* VFMADD231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 B9 /r */
29914 /* VFMSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BA /r */
29915 /* VFMSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BA /r */
29916 /* VFMSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BA /r */
29917 /* VFMSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BA /r */
29918 /* VFMSUB231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BB /r */
29919 /* VFMSUB231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BB /r */
29920 /* VFNMADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BC /r */
29921 /* VFNMADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BC /r */
29922 /* VFNMADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BC /r */
29923 /* VFNMADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BC /r */
29924 /* VFNMADD231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BD /r */
29925 /* VFNMADD231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BD /r */
29926 /* VFNMSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BE /r */
29927 /* VFNMSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BE /r */
29928 /* VFNMSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BE /r */
29929 /* VFNMSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BE /r */
29930 /* VFNMSUB231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BF /r */
29931 /* VFNMSUB231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BF /r */
29932 if (have66noF2noF3(pfx
)) {
29933 delta
= dis_FMA( vbi
, pfx
, delta
, opc
);
29935 dres
->hint
= Dis_HintVerbose
;
29936 goto decode_success
;
29945 /* VAESIMC xmm2/m128, xmm1 = VEX.128.66.0F38.WIG DB /r */
29946 /* VAESENC xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DC /r */
29947 /* VAESENCLAST xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DD /r */
29948 /* VAESDEC xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DE /r */
29949 /* VAESDECLAST xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DF /r */
29950 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
29951 delta
= dis_AESx( vbi
, pfx
, delta
, True
/*!isAvx*/, opc
);
29952 if (opc
!= 0xDB) *uses_vvvv
= True
;
29953 goto decode_success
;
29958 /* ANDN r/m32, r32b, r32a = VEX.NDS.LZ.0F38.W0 F2 /r */
29959 /* ANDN r/m64, r64b, r64a = VEX.NDS.LZ.0F38.W1 F2 /r */
29960 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/ && !haveREX(pfx
)) {
29961 Int size
= getRexW(pfx
) ? 8 : 4;
29962 IRType ty
= szToITy(size
);
29963 IRTemp dst
= newTemp(ty
);
29964 IRTemp src1
= newTemp(ty
);
29965 IRTemp src2
= newTemp(ty
);
29966 UChar rm
= getUChar(delta
);
29968 assign( src1
, getIRegV(size
,pfx
) );
29969 if (epartIsReg(rm
)) {
29970 assign( src2
, getIRegE(size
,pfx
,rm
) );
29971 DIP("andn %s,%s,%s\n", nameIRegE(size
,pfx
,rm
),
29972 nameIRegV(size
,pfx
), nameIRegG(size
,pfx
,rm
));
29975 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
29976 assign( src2
, loadLE(ty
, mkexpr(addr
)) );
29977 DIP("andn %s,%s,%s\n", dis_buf
, nameIRegV(size
,pfx
),
29978 nameIRegG(size
,pfx
,rm
));
29982 assign( dst
, binop( mkSizedOp(ty
,Iop_And8
),
29983 unop( mkSizedOp(ty
,Iop_Not8
), mkexpr(src1
) ),
29985 putIRegG( size
, pfx
, rm
, mkexpr(dst
) );
29986 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(size
== 8
29987 ? AMD64G_CC_OP_ANDN64
29988 : AMD64G_CC_OP_ANDN32
)) );
29989 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(dst
))) );
29990 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0)) );
29991 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
29993 goto decode_success
;
29998 /* BLSI r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /3 */
29999 /* BLSI r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /3 */
30000 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/
30001 && !haveREX(pfx
) && gregLO3ofRM(getUChar(delta
)) == 3) {
30002 Int size
= getRexW(pfx
) ? 8 : 4;
30003 IRType ty
= szToITy(size
);
30004 IRTemp src
= newTemp(ty
);
30005 IRTemp dst
= newTemp(ty
);
30006 UChar rm
= getUChar(delta
);
30008 if (epartIsReg(rm
)) {
30009 assign( src
, getIRegE(size
,pfx
,rm
) );
30010 DIP("blsi %s,%s\n", nameIRegE(size
,pfx
,rm
),
30011 nameIRegV(size
,pfx
));
30014 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
30015 assign( src
, loadLE(ty
, mkexpr(addr
)) );
30016 DIP("blsi %s,%s\n", dis_buf
, nameIRegV(size
,pfx
));
30020 assign( dst
, binop(mkSizedOp(ty
,Iop_And8
),
30021 binop(mkSizedOp(ty
,Iop_Sub8
), mkU(ty
, 0),
30022 mkexpr(src
)), mkexpr(src
)) );
30023 putIRegV( size
, pfx
, mkexpr(dst
) );
30024 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(size
== 8
30025 ? AMD64G_CC_OP_BLSI64
30026 : AMD64G_CC_OP_BLSI32
)) );
30027 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(dst
))) );
30028 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto64(mkexpr(src
))) );
30029 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
30031 goto decode_success
;
30033 /* BLSMSK r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /2 */
30034 /* BLSMSK r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /2 */
30035 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/
30036 && !haveREX(pfx
) && gregLO3ofRM(getUChar(delta
)) == 2) {
30037 Int size
= getRexW(pfx
) ? 8 : 4;
30038 IRType ty
= szToITy(size
);
30039 IRTemp src
= newTemp(ty
);
30040 IRTemp dst
= newTemp(ty
);
30041 UChar rm
= getUChar(delta
);
30043 if (epartIsReg(rm
)) {
30044 assign( src
, getIRegE(size
,pfx
,rm
) );
30045 DIP("blsmsk %s,%s\n", nameIRegE(size
,pfx
,rm
),
30046 nameIRegV(size
,pfx
));
30049 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
30050 assign( src
, loadLE(ty
, mkexpr(addr
)) );
30051 DIP("blsmsk %s,%s\n", dis_buf
, nameIRegV(size
,pfx
));
30055 assign( dst
, binop(mkSizedOp(ty
,Iop_Xor8
),
30056 binop(mkSizedOp(ty
,Iop_Sub8
), mkexpr(src
),
30057 mkU(ty
, 1)), mkexpr(src
)) );
30058 putIRegV( size
, pfx
, mkexpr(dst
) );
30059 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(size
== 8
30060 ? AMD64G_CC_OP_BLSMSK64
30061 : AMD64G_CC_OP_BLSMSK32
)) );
30062 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(dst
))) );
30063 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto64(mkexpr(src
))) );
30064 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
30066 goto decode_success
;
30068 /* BLSR r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /1 */
30069 /* BLSR r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /1 */
30070 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/
30071 && !haveREX(pfx
) && gregLO3ofRM(getUChar(delta
)) == 1) {
30072 Int size
= getRexW(pfx
) ? 8 : 4;
30073 IRType ty
= szToITy(size
);
30074 IRTemp src
= newTemp(ty
);
30075 IRTemp dst
= newTemp(ty
);
30076 UChar rm
= getUChar(delta
);
30078 if (epartIsReg(rm
)) {
30079 assign( src
, getIRegE(size
,pfx
,rm
) );
30080 DIP("blsr %s,%s\n", nameIRegE(size
,pfx
,rm
),
30081 nameIRegV(size
,pfx
));
30084 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
30085 assign( src
, loadLE(ty
, mkexpr(addr
)) );
30086 DIP("blsr %s,%s\n", dis_buf
, nameIRegV(size
,pfx
));
30090 assign( dst
, binop(mkSizedOp(ty
,Iop_And8
),
30091 binop(mkSizedOp(ty
,Iop_Sub8
), mkexpr(src
),
30092 mkU(ty
, 1)), mkexpr(src
)) );
30093 putIRegV( size
, pfx
, mkexpr(dst
) );
30094 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(size
== 8
30095 ? AMD64G_CC_OP_BLSR64
30096 : AMD64G_CC_OP_BLSR32
)) );
30097 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(dst
))) );
30098 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto64(mkexpr(src
))) );
30099 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
30101 goto decode_success
;
30106 /* BZHI r32b, r/m32, r32a = VEX.NDS.LZ.0F38.W0 F5 /r */
30107 /* BZHI r64b, r/m64, r64a = VEX.NDS.LZ.0F38.W1 F5 /r */
30108 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/ && !haveREX(pfx
)) {
30109 Int size
= getRexW(pfx
) ? 8 : 4;
30110 IRType ty
= szToITy(size
);
30111 IRTemp dst
= newTemp(ty
);
30112 IRTemp src1
= newTemp(ty
);
30113 IRTemp src2
= newTemp(ty
);
30114 IRTemp start
= newTemp(Ity_I8
);
30115 IRTemp cond
= newTemp(Ity_I1
);
30116 UChar rm
= getUChar(delta
);
30118 assign( src2
, getIRegV(size
,pfx
) );
30119 if (epartIsReg(rm
)) {
30120 assign( src1
, getIRegE(size
,pfx
,rm
) );
30121 DIP("bzhi %s,%s,%s\n", nameIRegV(size
,pfx
),
30122 nameIRegE(size
,pfx
,rm
), nameIRegG(size
,pfx
,rm
));
30125 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
30126 assign( src1
, loadLE(ty
, mkexpr(addr
)) );
30127 DIP("bzhi %s,%s,%s\n", nameIRegV(size
,pfx
), dis_buf
,
30128 nameIRegG(size
,pfx
,rm
));
30132 assign( start
, narrowTo( Ity_I8
, mkexpr(src2
) ) );
30133 assign( cond
, binop(Iop_CmpLT32U
,
30134 unop(Iop_8Uto32
, mkexpr(start
)),
30136 /* if (start < opsize) {
30140 dst = (src1 << (opsize-start)) u>> (opsize-start);
30148 binop(Iop_CmpEQ8
, mkexpr(start
), mkU8(0)),
30151 mkSizedOp(ty
,Iop_Shr8
),
30153 mkSizedOp(ty
,Iop_Shl8
),
30155 binop(Iop_Sub8
, mkU8(8*size
), mkexpr(start
))
30157 binop(Iop_Sub8
, mkU8(8*size
), mkexpr(start
))
30163 putIRegG( size
, pfx
, rm
, mkexpr(dst
) );
30164 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(size
== 8
30165 ? AMD64G_CC_OP_BLSR64
30166 : AMD64G_CC_OP_BLSR32
)) );
30167 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(dst
))) );
30168 stmt( IRStmt_Put( OFFB_CC_DEP2
, widenUto64(mkexpr(cond
))) );
30169 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
30171 goto decode_success
;
30173 /* PDEP r/m32, r32b, r32a = VEX.NDS.LZ.F2.0F38.W0 F5 /r */
30174 /* PDEP r/m64, r64b, r64a = VEX.NDS.LZ.F2.0F38.W1 F5 /r */
30175 if (haveF2no66noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/ && !haveREX(pfx
)) {
30176 Int size
= getRexW(pfx
) ? 8 : 4;
30177 IRType ty
= szToITy(size
);
30178 IRTemp src
= newTemp(ty
);
30179 IRTemp mask
= newTemp(ty
);
30180 UChar rm
= getUChar(delta
);
30182 assign( src
, getIRegV(size
,pfx
) );
30183 if (epartIsReg(rm
)) {
30184 assign( mask
, getIRegE(size
,pfx
,rm
) );
30185 DIP("pdep %s,%s,%s\n", nameIRegE(size
,pfx
,rm
),
30186 nameIRegV(size
,pfx
), nameIRegG(size
,pfx
,rm
));
30189 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
30190 assign( mask
, loadLE(ty
, mkexpr(addr
)) );
30191 DIP("pdep %s,%s,%s\n", dis_buf
, nameIRegV(size
,pfx
),
30192 nameIRegG(size
,pfx
,rm
));
30196 IRExpr
** args
= mkIRExprVec_2( widenUto64(mkexpr(src
)),
30197 widenUto64(mkexpr(mask
)) );
30198 putIRegG( size
, pfx
, rm
,
30199 narrowTo(ty
, mkIRExprCCall(Ity_I64
, 0/*regparms*/,
30200 "amd64g_calculate_pdep",
30201 &amd64g_calculate_pdep
, args
)) );
30203 /* Flags aren't modified. */
30204 goto decode_success
;
30206 /* PEXT r/m32, r32b, r32a = VEX.NDS.LZ.F3.0F38.W0 F5 /r */
30207 /* PEXT r/m64, r64b, r64a = VEX.NDS.LZ.F3.0F38.W1 F5 /r */
30208 if (haveF3no66noF2(pfx
) && 0==getVexL(pfx
)/*LZ*/ && !haveREX(pfx
)) {
30209 Int size
= getRexW(pfx
) ? 8 : 4;
30210 IRType ty
= szToITy(size
);
30211 IRTemp src
= newTemp(ty
);
30212 IRTemp mask
= newTemp(ty
);
30213 UChar rm
= getUChar(delta
);
30215 assign( src
, getIRegV(size
,pfx
) );
30216 if (epartIsReg(rm
)) {
30217 assign( mask
, getIRegE(size
,pfx
,rm
) );
30218 DIP("pext %s,%s,%s\n", nameIRegE(size
,pfx
,rm
),
30219 nameIRegV(size
,pfx
), nameIRegG(size
,pfx
,rm
));
30222 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
30223 assign( mask
, loadLE(ty
, mkexpr(addr
)) );
30224 DIP("pext %s,%s,%s\n", dis_buf
, nameIRegV(size
,pfx
),
30225 nameIRegG(size
,pfx
,rm
));
30229 /* First mask off bits not set in mask, they are ignored
30230 and it should be fine if they contain undefined values. */
30231 IRExpr
* masked
= binop(mkSizedOp(ty
,Iop_And8
),
30232 mkexpr(src
), mkexpr(mask
));
30233 IRExpr
** args
= mkIRExprVec_2( widenUto64(masked
),
30234 widenUto64(mkexpr(mask
)) );
30235 putIRegG( size
, pfx
, rm
,
30236 narrowTo(ty
, mkIRExprCCall(Ity_I64
, 0/*regparms*/,
30237 "amd64g_calculate_pext",
30238 &amd64g_calculate_pext
, args
)) );
30240 /* Flags aren't modified. */
30241 goto decode_success
;
30246 /* MULX r/m32, r32b, r32a = VEX.NDD.LZ.F2.0F38.W0 F6 /r */
30247 /* MULX r/m64, r64b, r64a = VEX.NDD.LZ.F2.0F38.W1 F6 /r */
30248 if (haveF2no66noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/ && !haveREX(pfx
)) {
30249 Int size
= getRexW(pfx
) ? 8 : 4;
30250 IRType ty
= szToITy(size
);
30251 IRTemp src1
= newTemp(ty
);
30252 IRTemp src2
= newTemp(ty
);
30253 IRTemp res
= newTemp(size
== 8 ? Ity_I128
: Ity_I64
);
30254 UChar rm
= getUChar(delta
);
30256 assign( src1
, getIRegRDX(size
) );
30257 if (epartIsReg(rm
)) {
30258 assign( src2
, getIRegE(size
,pfx
,rm
) );
30259 DIP("mulx %s,%s,%s\n", nameIRegE(size
,pfx
,rm
),
30260 nameIRegV(size
,pfx
), nameIRegG(size
,pfx
,rm
));
30263 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
30264 assign( src2
, loadLE(ty
, mkexpr(addr
)) );
30265 DIP("mulx %s,%s,%s\n", dis_buf
, nameIRegV(size
,pfx
),
30266 nameIRegG(size
,pfx
,rm
));
30270 assign( res
, binop(size
== 8 ? Iop_MullU64
: Iop_MullU32
,
30271 mkexpr(src1
), mkexpr(src2
)) );
30272 putIRegV( size
, pfx
,
30273 unop(size
== 8 ? Iop_128to64
: Iop_64to32
, mkexpr(res
)) );
30274 putIRegG( size
, pfx
, rm
,
30275 unop(size
== 8 ? Iop_128HIto64
: Iop_64HIto32
,
30278 /* Flags aren't modified. */
30279 goto decode_success
;
30284 /* SARX r32b, r/m32, r32a = VEX.NDS.LZ.F3.0F38.W0 F7 /r */
30285 /* SARX r64b, r/m64, r64a = VEX.NDS.LZ.F3.0F38.W1 F7 /r */
30286 if (haveF3no66noF2(pfx
) && 0==getVexL(pfx
)/*LZ*/ && !haveREX(pfx
)) {
30287 delta
= dis_SHIFTX( uses_vvvv
, vbi
, pfx
, delta
, "sarx", Iop_Sar8
);
30288 goto decode_success
;
30290 /* SHLX r32b, r/m32, r32a = VEX.NDS.LZ.66.0F38.W0 F7 /r */
30291 /* SHLX r64b, r/m64, r64a = VEX.NDS.LZ.66.0F38.W1 F7 /r */
30292 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/ && !haveREX(pfx
)) {
30293 delta
= dis_SHIFTX( uses_vvvv
, vbi
, pfx
, delta
, "shlx", Iop_Shl8
);
30294 goto decode_success
;
30296 /* SHRX r32b, r/m32, r32a = VEX.NDS.LZ.F2.0F38.W0 F7 /r */
30297 /* SHRX r64b, r/m64, r64a = VEX.NDS.LZ.F2.0F38.W1 F7 /r */
30298 if (haveF2no66noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/ && !haveREX(pfx
)) {
30299 delta
= dis_SHIFTX( uses_vvvv
, vbi
, pfx
, delta
, "shrx", Iop_Shr8
);
30300 goto decode_success
;
30302 /* BEXTR r32b, r/m32, r32a = VEX.NDS.LZ.0F38.W0 F7 /r */
30303 /* BEXTR r64b, r/m64, r64a = VEX.NDS.LZ.0F38.W1 F7 /r */
30304 if (haveNo66noF2noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/ && !haveREX(pfx
)) {
30305 Int size
= getRexW(pfx
) ? 8 : 4;
30306 IRType ty
= szToITy(size
);
30307 IRTemp dst
= newTemp(ty
);
30308 IRTemp src1
= newTemp(ty
);
30309 IRTemp src2
= newTemp(ty
);
30310 IRTemp stle
= newTemp(Ity_I16
);
30311 IRTemp start
= newTemp(Ity_I8
);
30312 IRTemp len
= newTemp(Ity_I8
);
30313 UChar rm
= getUChar(delta
);
30315 assign( src2
, getIRegV(size
,pfx
) );
30316 if (epartIsReg(rm
)) {
30317 assign( src1
, getIRegE(size
,pfx
,rm
) );
30318 DIP("bextr %s,%s,%s\n", nameIRegV(size
,pfx
),
30319 nameIRegE(size
,pfx
,rm
), nameIRegG(size
,pfx
,rm
));
30322 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
30323 assign( src1
, loadLE(ty
, mkexpr(addr
)) );
30324 DIP("bextr %s,%s,%s\n", nameIRegV(size
,pfx
), dis_buf
,
30325 nameIRegG(size
,pfx
,rm
));
30329 assign( stle
, narrowTo( Ity_I16
, mkexpr(src2
) ) );
30330 assign( start
, unop( Iop_16to8
, mkexpr(stle
) ) );
30331 assign( len
, unop( Iop_16HIto8
, mkexpr(stle
) ) );
30332 /* if (start+len < opsize) {
30334 dst = (src1 << (opsize-start-len)) u>> (opsize-len);
30338 if (start < opsize)
30339 dst = src1 u>> start;
30345 binop(Iop_CmpLT32U
,
30347 unop(Iop_8Uto32
, mkexpr(start
)),
30348 unop(Iop_8Uto32
, mkexpr(len
))),
30351 binop(Iop_CmpEQ8
, mkexpr(len
), mkU8(0)),
30353 binop(mkSizedOp(ty
,Iop_Shr8
),
30354 binop(mkSizedOp(ty
,Iop_Shl8
), mkexpr(src1
),
30356 binop(Iop_Sub8
, mkU8(8*size
),
30359 binop(Iop_Sub8
, mkU8(8*size
),
30363 binop(Iop_CmpLT32U
,
30364 unop(Iop_8Uto32
, mkexpr(start
)),
30366 binop(mkSizedOp(ty
,Iop_Shr8
), mkexpr(src1
),
30372 putIRegG( size
, pfx
, rm
, mkexpr(dst
) );
30373 stmt( IRStmt_Put( OFFB_CC_OP
, mkU64(size
== 8
30374 ? AMD64G_CC_OP_ANDN64
30375 : AMD64G_CC_OP_ANDN32
)) );
30376 stmt( IRStmt_Put( OFFB_CC_DEP1
, widenUto64(mkexpr(dst
))) );
30377 stmt( IRStmt_Put( OFFB_CC_DEP2
, mkU64(0)) );
30378 stmt( IRStmt_Put( OFFB_CC_NDEP
, mkU64(0) ));
30380 goto decode_success
;
30400 static Long
decode_vregW(Int count
, Long delta
, UChar modrm
, Prefix pfx
,
30401 const VexAbiInfo
* vbi
, IRTemp
*v
, UInt
*dst
, Int swap
)
30403 v
[0] = newTemp(Ity_V128
);
30404 v
[1] = newTemp(Ity_V128
);
30405 v
[2] = newTemp(Ity_V128
);
30406 v
[3] = newTemp(Ity_V128
);
30407 IRTemp addr
= IRTemp_INVALID
;
30411 *dst
= gregOfRexRM(pfx
, modrm
);
30412 assign( v
[0], getXMMReg(*dst
) );
30414 if ( epartIsReg( modrm
) ) {
30415 UInt ereg
= eregOfRexRM(pfx
, modrm
);
30416 assign(swap
? v
[count
-1] : v
[count
-2], getXMMReg(ereg
) );
30417 DIS(dis_buf
, "%s", nameXMMReg(ereg
));
30419 Bool extra_byte
= (getUChar(delta
- 3) & 0xF) != 9;
30420 addr
= disAMode(&alen
, vbi
, pfx
, delta
, dis_buf
, extra_byte
);
30421 assign(swap
? v
[count
-1] : v
[count
-2], loadLE(Ity_V128
, mkexpr(addr
)));
30425 UInt vvvv
= getVexNvvvv(pfx
);
30428 DIP( "%s,%s", nameXMMReg(*dst
), dis_buf
);
30431 assign( swap
? v
[1] : v
[2], getXMMReg(vvvv
) );
30432 DIP( "%s,%s,%s", nameXMMReg(*dst
), nameXMMReg(vvvv
), dis_buf
);
30436 assign( v
[1], getXMMReg(vvvv
) );
30437 UInt src2
= getUChar(delta
+ 1) >> 4;
30438 assign( swap
? v
[2] : v
[3], getXMMReg(src2
) );
30439 DIP( "%s,%s,%s,%s", nameXMMReg(*dst
), nameXMMReg(vvvv
),
30440 nameXMMReg(src2
), dis_buf
);
30447 static Long
dis_FMA4 (Prefix pfx
, Long delta
, UChar opc
,
30448 Bool
* uses_vvvv
, const VexAbiInfo
* vbi
)
30453 UChar modrm
= getUChar(delta
);
30455 Bool zero_64F
= False
;
30456 Bool zero_96F
= False
;
30457 UInt is_F32
= ((opc
& 0x01) == 0x00) ? 1 : 0;
30458 Bool neg
= (opc
& 0xF0) == 0x70;
30459 Bool alt
= (opc
& 0xF0) == 0x50;
30460 Bool sub
= alt
? (opc
& 0x0E) != 0x0E : (opc
& 0x0C) == 0x0C;
30463 switch(opc
& 0xF) {
30464 case 0x0A: zero_96F
= (opc
>> 4) != 0x05; break;
30465 case 0x0B: zero_64F
= (opc
>> 4) != 0x05; break;
30466 case 0x0E: zero_96F
= (opc
>> 4) != 0x05; break;
30467 case 0x0F: zero_64F
= (opc
>> 4) != 0x05; break;
30470 DIP("vfm%s", neg
? "n" : "");
30471 if(alt
) DIP("%s", sub
? "add" : "sub");
30472 DIP("%s", sub
? "sub" : "add");
30473 DIP("%c ", (zero_64F
|| zero_96F
) ? 's' : 'p');
30474 DIP("%c ", is_F32
? 's' : 'd');
30475 delta
= decode_vregW(4, delta
, modrm
, pfx
, vbi
, operand
, &dst
, getRexW(pfx
));
30479 void (*putXMM
[2])(UInt
,Int
,IRExpr
*) = {&putXMMRegLane64F
, &putXMMRegLane32F
};
30481 IROp size_op
[] = {Iop_V128to64
, Iop_V128HIto64
, Iop_64to32
, Iop_64HIto32
};
30482 IROp neg_op
[] = {Iop_NegF64
, Iop_NegF32
};
30484 for(i
= 0; i
< is_F32
* 2 + 2; i
++) {
30485 for(j
= 0; j
< 3; j
++) {
30487 src
[j
] = unop(Iop_ReinterpI32asF32
,
30488 unop(size_op
[i
%2+2],
30490 mkexpr(operand
[j
+ 1])
30494 src
[j
] = unop(Iop_ReinterpI64asF64
,
30496 mkexpr(operand
[j
+ 1])
30500 putXMM
[is_F32
](dst
, i
, IRExpr_Qop(is_F32
? Iop_MAddF32
: Iop_MAddF64
,
30501 get_FAKE_roundingmode(),
30502 neg
? unop(neg_op
[is_F32
], src
[0])
30505 sub
? unop(neg_op
[is_F32
], src
[2])
30513 /* Zero out top bits of ymm/xmm register. */
30514 putYMMRegLane128( dst
, 1, mkV128(0) );
30516 if(zero_64F
|| zero_96F
) {
30517 putXMMRegLane64( dst
, 1, IRExpr_Const(IRConst_U64(0)));
30521 putXMMRegLane32( dst
, 1, IRExpr_Const(IRConst_U32(0)));
30527 /*------------------------------------------------------------*/
30529 /*--- Top-level post-escape decoders: dis_ESC_0F3A__VEX ---*/
30531 /*------------------------------------------------------------*/
30533 static IRTemp
math_VPERMILPS_128 ( IRTemp sV
, UInt imm8
)
30535 vassert(imm8
< 256);
30536 IRTemp s3
, s2
, s1
, s0
;
30537 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
30538 breakupV128to32s( sV
, &s3
, &s2
, &s1
, &s0
);
30539 # define SEL(_nn) (((_nn)==0) ? s0 : ((_nn)==1) ? s1 \
30540 : ((_nn)==2) ? s2 : s3)
30541 IRTemp res
= newTemp(Ity_V128
);
30542 assign(res
, mkV128from32s( SEL((imm8
>> 6) & 3),
30543 SEL((imm8
>> 4) & 3),
30544 SEL((imm8
>> 2) & 3),
30545 SEL((imm8
>> 0) & 3) ));
30550 /* Handles 128 and 256 bit versions of VCVTPS2PH. */
30551 static Long
dis_VCVTPS2PH ( const VexAbiInfo
* vbi
, Prefix pfx
,
30552 Long delta
, Bool is256bit
)
30554 /* This is a width-halving store or reg-reg move, that does conversion on the
30555 transferred data. */
30556 UChar modrm
= getUChar(delta
);
30557 UInt rG
= gregOfRexRM(pfx
, modrm
);
30558 IRTemp rm
= newTemp(Ity_I32
);
30559 IROp op
= is256bit
? Iop_F32toF16x8
: Iop_F32toF16x4
;
30560 IRExpr
* srcG
= (is256bit
? getYMMReg
: getXMMReg
)(rG
);
30562 /* (imm & 3) contains an Intel-encoded rounding mode. Because that encoding
30563 is the same as the encoding for IRRoundingMode, we can use that value
30564 directly in the IR as a rounding mode. */
30566 if (epartIsReg(modrm
)) {
30567 UInt rE
= eregOfRexRM(pfx
, modrm
);
30569 UInt imm
= getUChar(delta
);
30570 assign(rm
, (imm
& 4) ? get_sse_roundingmode() : mkU32(imm
& 3));
30571 IRExpr
* res
= binop(op
, mkexpr(rm
), srcG
);
30573 res
= unop(Iop_64UtoV128
, res
);
30574 putYMMRegLoAndZU(rE
, res
);
30575 DIP("vcvtps2ph $%u,%s,%s\n",
30576 imm
, (is256bit
? nameYMMReg
: nameXMMReg
)(rG
), nameXMMReg(rE
));
30580 IRTemp addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30582 UInt imm
= getUChar(delta
);
30583 assign(rm
, (imm
& 4) ? get_sse_roundingmode() : mkU32(imm
& 3));
30584 IRExpr
* res
= binop(op
, mkexpr(rm
), srcG
);
30585 storeLE(mkexpr(addr
), res
);
30586 DIP("vcvtps2ph $%u,%s,%s\n",
30587 imm
, (is256bit
? nameYMMReg
: nameXMMReg
)(rG
), dis_buf
);
30590 /* doesn't use vvvv */
30594 __attribute__((noinline
))
30596 Long
dis_ESC_0F3A__VEX (
30597 /*MB_OUT*/DisResult
* dres
,
30598 /*OUT*/ Bool
* uses_vvvv
,
30599 const VexArchInfo
* archinfo
,
30600 const VexAbiInfo
* vbi
,
30601 Prefix pfx
, Int sz
, Long deltaIN
30604 IRTemp addr
= IRTemp_INVALID
;
30607 Long delta
= deltaIN
;
30608 UChar opc
= getUChar(delta
);
30610 *uses_vvvv
= False
;
30616 /* VPERMQ imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.W1 00 /r ib */
30617 /* VPERMPD imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.W1 01 /r ib */
30618 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/
30619 && 1==getRexW(pfx
)/*W1*/) {
30620 UChar modrm
= getUChar(delta
);
30622 UInt rG
= gregOfRexRM(pfx
, modrm
);
30623 IRTemp sV
= newTemp(Ity_V256
);
30624 const HChar
*name
= opc
== 0 ? "vpermq" : "vpermpd";
30625 if (epartIsReg(modrm
)) {
30626 UInt rE
= eregOfRexRM(pfx
, modrm
);
30628 imm8
= getUChar(delta
);
30629 DIP("%s $%u,%s,%s\n",
30630 name
, imm8
, nameYMMReg(rE
), nameYMMReg(rG
));
30631 assign(sV
, getYMMReg(rE
));
30633 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30635 imm8
= getUChar(delta
);
30636 DIP("%s $%u,%s,%s\n",
30637 name
, imm8
, dis_buf
, nameYMMReg(rG
));
30638 assign(sV
, loadLE(Ity_V256
, mkexpr(addr
)));
30642 s
[3] = s
[2] = s
[1] = s
[0] = IRTemp_INVALID
;
30643 breakupV256to64s(sV
, &s
[3], &s
[2], &s
[1], &s
[0]);
30644 IRTemp dV
= newTemp(Ity_V256
);
30645 assign(dV
, IRExpr_Qop(Iop_64x4toV256
,
30646 mkexpr(s
[(imm8
>> 6) & 3]),
30647 mkexpr(s
[(imm8
>> 4) & 3]),
30648 mkexpr(s
[(imm8
>> 2) & 3]),
30649 mkexpr(s
[(imm8
>> 0) & 3])));
30650 putYMMReg(rG
, mkexpr(dV
));
30651 goto decode_success
;
30656 /* VPBLENDD imm8, xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 02 /r ib */
30657 if (have66noF2noF3(pfx
)
30658 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
30659 UChar modrm
= getUChar(delta
);
30661 UInt rG
= gregOfRexRM(pfx
, modrm
);
30662 UInt rV
= getVexNvvvv(pfx
);
30663 IRTemp sV
= newTemp(Ity_V128
);
30664 IRTemp dV
= newTemp(Ity_V128
);
30667 assign(sV
, getXMMReg(rV
));
30668 if (epartIsReg(modrm
)) {
30669 UInt rE
= eregOfRexRM(pfx
, modrm
);
30671 imm8
= getUChar(delta
);
30672 DIP("vpblendd $%u,%s,%s,%s\n",
30673 imm8
, nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
30674 assign(dV
, getXMMReg(rE
));
30676 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30678 imm8
= getUChar(delta
);
30679 DIP("vpblendd $%u,%s,%s,%s\n",
30680 imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
30681 assign(dV
, loadLE(Ity_V128
, mkexpr(addr
)));
30684 for (i
= 0; i
< 4; i
++) {
30685 s
[i
] = IRTemp_INVALID
;
30686 d
[i
] = IRTemp_INVALID
;
30688 breakupV128to32s( sV
, &s
[3], &s
[2], &s
[1], &s
[0] );
30689 breakupV128to32s( dV
, &d
[3], &d
[2], &d
[1], &d
[0] );
30690 for (i
= 0; i
< 4; i
++)
30691 putYMMRegLane32(rG
, i
, mkexpr((imm8
& (1<<i
)) ? d
[i
] : s
[i
]));
30692 putYMMRegLane128(rG
, 1, mkV128(0));
30694 goto decode_success
;
30696 /* VPBLENDD imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F3A.W0 02 /r ib */
30697 if (have66noF2noF3(pfx
)
30698 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
30699 UChar modrm
= getUChar(delta
);
30701 UInt rG
= gregOfRexRM(pfx
, modrm
);
30702 UInt rV
= getVexNvvvv(pfx
);
30703 IRTemp sV
= newTemp(Ity_V256
);
30704 IRTemp dV
= newTemp(Ity_V256
);
30707 assign(sV
, getYMMReg(rV
));
30708 if (epartIsReg(modrm
)) {
30709 UInt rE
= eregOfRexRM(pfx
, modrm
);
30711 imm8
= getUChar(delta
);
30712 DIP("vpblendd $%u,%s,%s,%s\n",
30713 imm8
, nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
30714 assign(dV
, getYMMReg(rE
));
30716 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30718 imm8
= getUChar(delta
);
30719 DIP("vpblendd $%u,%s,%s,%s\n",
30720 imm8
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
30721 assign(dV
, loadLE(Ity_V256
, mkexpr(addr
)));
30724 for (i
= 0; i
< 8; i
++) {
30725 s
[i
] = IRTemp_INVALID
;
30726 d
[i
] = IRTemp_INVALID
;
30728 breakupV256to32s( sV
, &s
[7], &s
[6], &s
[5], &s
[4],
30729 &s
[3], &s
[2], &s
[1], &s
[0] );
30730 breakupV256to32s( dV
, &d
[7], &d
[6], &d
[5], &d
[4],
30731 &d
[3], &d
[2], &d
[1], &d
[0] );
30732 for (i
= 0; i
< 8; i
++)
30733 putYMMRegLane32(rG
, i
, mkexpr((imm8
& (1<<i
)) ? d
[i
] : s
[i
]));
30735 goto decode_success
;
30740 /* VPERMILPS imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.WIG 04 /r ib */
30741 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
30742 UChar modrm
= getUChar(delta
);
30744 UInt rG
= gregOfRexRM(pfx
, modrm
);
30745 IRTemp sV
= newTemp(Ity_V256
);
30746 if (epartIsReg(modrm
)) {
30747 UInt rE
= eregOfRexRM(pfx
, modrm
);
30749 imm8
= getUChar(delta
);
30750 DIP("vpermilps $%u,%s,%s\n",
30751 imm8
, nameYMMReg(rE
), nameYMMReg(rG
));
30752 assign(sV
, getYMMReg(rE
));
30754 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30756 imm8
= getUChar(delta
);
30757 DIP("vpermilps $%u,%s,%s\n",
30758 imm8
, dis_buf
, nameYMMReg(rG
));
30759 assign(sV
, loadLE(Ity_V256
, mkexpr(addr
)));
30762 IRTemp sVhi
= IRTemp_INVALID
, sVlo
= IRTemp_INVALID
;
30763 breakupV256toV128s( sV
, &sVhi
, &sVlo
);
30764 IRTemp dVhi
= math_VPERMILPS_128( sVhi
, imm8
);
30765 IRTemp dVlo
= math_VPERMILPS_128( sVlo
, imm8
);
30766 IRExpr
* res
= binop(Iop_V128HLtoV256
, mkexpr(dVhi
), mkexpr(dVlo
));
30767 putYMMReg(rG
, res
);
30768 goto decode_success
;
30770 /* VPERMILPS imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG 04 /r ib */
30771 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
30772 UChar modrm
= getUChar(delta
);
30774 UInt rG
= gregOfRexRM(pfx
, modrm
);
30775 IRTemp sV
= newTemp(Ity_V128
);
30776 if (epartIsReg(modrm
)) {
30777 UInt rE
= eregOfRexRM(pfx
, modrm
);
30779 imm8
= getUChar(delta
);
30780 DIP("vpermilps $%u,%s,%s\n",
30781 imm8
, nameXMMReg(rE
), nameXMMReg(rG
));
30782 assign(sV
, getXMMReg(rE
));
30784 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30786 imm8
= getUChar(delta
);
30787 DIP("vpermilps $%u,%s,%s\n",
30788 imm8
, dis_buf
, nameXMMReg(rG
));
30789 assign(sV
, loadLE(Ity_V128
, mkexpr(addr
)));
30792 putYMMRegLoAndZU(rG
, mkexpr ( math_VPERMILPS_128 ( sV
, imm8
) ) );
30793 goto decode_success
;
30798 /* VPERMILPD imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG 05 /r ib */
30799 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
30800 UChar modrm
= getUChar(delta
);
30802 UInt rG
= gregOfRexRM(pfx
, modrm
);
30803 IRTemp sV
= newTemp(Ity_V128
);
30804 if (epartIsReg(modrm
)) {
30805 UInt rE
= eregOfRexRM(pfx
, modrm
);
30807 imm8
= getUChar(delta
);
30808 DIP("vpermilpd $%u,%s,%s\n",
30809 imm8
, nameXMMReg(rE
), nameXMMReg(rG
));
30810 assign(sV
, getXMMReg(rE
));
30812 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30814 imm8
= getUChar(delta
);
30815 DIP("vpermilpd $%u,%s,%s\n",
30816 imm8
, dis_buf
, nameXMMReg(rG
));
30817 assign(sV
, loadLE(Ity_V128
, mkexpr(addr
)));
30820 IRTemp s1
= newTemp(Ity_I64
);
30821 IRTemp s0
= newTemp(Ity_I64
);
30822 assign(s1
, unop(Iop_V128HIto64
, mkexpr(sV
)));
30823 assign(s0
, unop(Iop_V128to64
, mkexpr(sV
)));
30824 IRTemp dV
= newTemp(Ity_V128
);
30825 assign(dV
, binop(Iop_64HLtoV128
,
30826 mkexpr((imm8
& (1<<1)) ? s1
: s0
),
30827 mkexpr((imm8
& (1<<0)) ? s1
: s0
)));
30828 putYMMRegLoAndZU(rG
, mkexpr(dV
));
30829 goto decode_success
;
30831 /* VPERMILPD imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.WIG 05 /r ib */
30832 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
30833 UChar modrm
= getUChar(delta
);
30835 UInt rG
= gregOfRexRM(pfx
, modrm
);
30836 IRTemp sV
= newTemp(Ity_V256
);
30837 if (epartIsReg(modrm
)) {
30838 UInt rE
= eregOfRexRM(pfx
, modrm
);
30840 imm8
= getUChar(delta
);
30841 DIP("vpermilpd $%u,%s,%s\n",
30842 imm8
, nameYMMReg(rE
), nameYMMReg(rG
));
30843 assign(sV
, getYMMReg(rE
));
30845 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30847 imm8
= getUChar(delta
);
30848 DIP("vpermilpd $%u,%s,%s\n",
30849 imm8
, dis_buf
, nameYMMReg(rG
));
30850 assign(sV
, loadLE(Ity_V256
, mkexpr(addr
)));
30853 IRTemp s3
, s2
, s1
, s0
;
30854 s3
= s2
= s1
= s0
= IRTemp_INVALID
;
30855 breakupV256to64s(sV
, &s3
, &s2
, &s1
, &s0
);
30856 IRTemp dV
= newTemp(Ity_V256
);
30857 assign(dV
, IRExpr_Qop(Iop_64x4toV256
,
30858 mkexpr((imm8
& (1<<3)) ? s3
: s2
),
30859 mkexpr((imm8
& (1<<2)) ? s3
: s2
),
30860 mkexpr((imm8
& (1<<1)) ? s1
: s0
),
30861 mkexpr((imm8
& (1<<0)) ? s1
: s0
)));
30862 putYMMReg(rG
, mkexpr(dV
));
30863 goto decode_success
;
30868 /* VPERM2F128 imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.66.0F3A.W0 06 /r ib */
30869 if (have66noF2noF3(pfx
)
30870 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
30871 UChar modrm
= getUChar(delta
);
30873 UInt rG
= gregOfRexRM(pfx
, modrm
);
30874 UInt rV
= getVexNvvvv(pfx
);
30875 IRTemp s00
= newTemp(Ity_V128
);
30876 IRTemp s01
= newTemp(Ity_V128
);
30877 IRTemp s10
= newTemp(Ity_V128
);
30878 IRTemp s11
= newTemp(Ity_V128
);
30879 assign(s00
, getYMMRegLane128(rV
, 0));
30880 assign(s01
, getYMMRegLane128(rV
, 1));
30881 if (epartIsReg(modrm
)) {
30882 UInt rE
= eregOfRexRM(pfx
, modrm
);
30884 imm8
= getUChar(delta
);
30885 DIP("vperm2f128 $%u,%s,%s,%s\n",
30886 imm8
, nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
30887 assign(s10
, getYMMRegLane128(rE
, 0));
30888 assign(s11
, getYMMRegLane128(rE
, 1));
30890 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30892 imm8
= getUChar(delta
);
30893 DIP("vperm2f128 $%u,%s,%s,%s\n",
30894 imm8
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
30895 assign(s10
, loadLE(Ity_V128
, binop(Iop_Add64
,
30896 mkexpr(addr
), mkU64(0))));
30897 assign(s11
, loadLE(Ity_V128
, binop(Iop_Add64
,
30898 mkexpr(addr
), mkU64(16))));
30901 # define SEL(_nn) (((_nn)==0) ? s00 : ((_nn)==1) ? s01 \
30902 : ((_nn)==2) ? s10 : s11)
30903 putYMMRegLane128(rG
, 0, mkexpr(SEL((imm8
>> 0) & 3)));
30904 putYMMRegLane128(rG
, 1, mkexpr(SEL((imm8
>> 4) & 3)));
30906 if (imm8
& (1<<3)) putYMMRegLane128(rG
, 0, mkV128(0));
30907 if (imm8
& (1<<7)) putYMMRegLane128(rG
, 1, mkV128(0));
30909 goto decode_success
;
30914 /* VROUNDPS imm8, xmm2/m128, xmm1 */
30915 /* VROUNDPS = VEX.NDS.128.66.0F3A.WIG 08 ib */
30916 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
30917 UChar modrm
= getUChar(delta
);
30918 UInt rG
= gregOfRexRM(pfx
, modrm
);
30919 IRTemp src
= newTemp(Ity_V128
);
30920 IRTemp s0
= IRTemp_INVALID
;
30921 IRTemp s1
= IRTemp_INVALID
;
30922 IRTemp s2
= IRTemp_INVALID
;
30923 IRTemp s3
= IRTemp_INVALID
;
30924 IRTemp rm
= newTemp(Ity_I32
);
30927 modrm
= getUChar(delta
);
30929 if (epartIsReg(modrm
)) {
30930 UInt rE
= eregOfRexRM(pfx
, modrm
);
30931 assign( src
, getXMMReg( rE
) );
30932 imm
= getUChar(delta
+1);
30933 if (imm
& ~15) break;
30935 DIP( "vroundps $%d,%s,%s\n", imm
, nameXMMReg(rE
), nameXMMReg(rG
) );
30937 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30938 assign( src
, loadLE(Ity_V128
, mkexpr(addr
) ) );
30939 imm
= getUChar(delta
+alen
);
30940 if (imm
& ~15) break;
30942 DIP( "vroundps $%d,%s,%s\n", imm
, dis_buf
, nameXMMReg(rG
) );
30945 /* (imm & 3) contains an Intel-encoded rounding mode. Because
30946 that encoding is the same as the encoding for IRRoundingMode,
30947 we can use that value directly in the IR as a rounding
30949 assign(rm
, (imm
& 4) ? get_sse_roundingmode() : mkU32(imm
& 3));
30951 breakupV128to32s( src
, &s3
, &s2
, &s1
, &s0
);
30952 putYMMRegLane128( rG
, 1, mkV128(0) );
30953 # define CVT(s) binop(Iop_RoundF32toInt, mkexpr(rm), \
30954 unop(Iop_ReinterpI32asF32, mkexpr(s)))
30955 putYMMRegLane32F( rG
, 3, CVT(s3
) );
30956 putYMMRegLane32F( rG
, 2, CVT(s2
) );
30957 putYMMRegLane32F( rG
, 1, CVT(s1
) );
30958 putYMMRegLane32F( rG
, 0, CVT(s0
) );
30960 goto decode_success
;
30962 /* VROUNDPS imm8, ymm2/m256, ymm1 */
30963 /* VROUNDPS = VEX.NDS.256.66.0F3A.WIG 08 ib */
30964 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
30965 UChar modrm
= getUChar(delta
);
30966 UInt rG
= gregOfRexRM(pfx
, modrm
);
30967 IRTemp src
= newTemp(Ity_V256
);
30968 IRTemp s0
= IRTemp_INVALID
;
30969 IRTemp s1
= IRTemp_INVALID
;
30970 IRTemp s2
= IRTemp_INVALID
;
30971 IRTemp s3
= IRTemp_INVALID
;
30972 IRTemp s4
= IRTemp_INVALID
;
30973 IRTemp s5
= IRTemp_INVALID
;
30974 IRTemp s6
= IRTemp_INVALID
;
30975 IRTemp s7
= IRTemp_INVALID
;
30976 IRTemp rm
= newTemp(Ity_I32
);
30979 modrm
= getUChar(delta
);
30981 if (epartIsReg(modrm
)) {
30982 UInt rE
= eregOfRexRM(pfx
, modrm
);
30983 assign( src
, getYMMReg( rE
) );
30984 imm
= getUChar(delta
+1);
30985 if (imm
& ~15) break;
30987 DIP( "vroundps $%d,%s,%s\n", imm
, nameYMMReg(rE
), nameYMMReg(rG
) );
30989 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
30990 assign( src
, loadLE(Ity_V256
, mkexpr(addr
) ) );
30991 imm
= getUChar(delta
+alen
);
30992 if (imm
& ~15) break;
30994 DIP( "vroundps $%d,%s,%s\n", imm
, dis_buf
, nameYMMReg(rG
) );
30997 /* (imm & 3) contains an Intel-encoded rounding mode. Because
30998 that encoding is the same as the encoding for IRRoundingMode,
30999 we can use that value directly in the IR as a rounding
31001 assign(rm
, (imm
& 4) ? get_sse_roundingmode() : mkU32(imm
& 3));
31003 breakupV256to32s( src
, &s7
, &s6
, &s5
, &s4
, &s3
, &s2
, &s1
, &s0
);
31004 # define CVT(s) binop(Iop_RoundF32toInt, mkexpr(rm), \
31005 unop(Iop_ReinterpI32asF32, mkexpr(s)))
31006 putYMMRegLane32F( rG
, 7, CVT(s7
) );
31007 putYMMRegLane32F( rG
, 6, CVT(s6
) );
31008 putYMMRegLane32F( rG
, 5, CVT(s5
) );
31009 putYMMRegLane32F( rG
, 4, CVT(s4
) );
31010 putYMMRegLane32F( rG
, 3, CVT(s3
) );
31011 putYMMRegLane32F( rG
, 2, CVT(s2
) );
31012 putYMMRegLane32F( rG
, 1, CVT(s1
) );
31013 putYMMRegLane32F( rG
, 0, CVT(s0
) );
31015 goto decode_success
;
31020 /* VROUNDPD imm8, xmm2/m128, xmm1 */
31021 /* VROUNDPD = VEX.NDS.128.66.0F3A.WIG 09 ib */
31022 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31023 UChar modrm
= getUChar(delta
);
31024 UInt rG
= gregOfRexRM(pfx
, modrm
);
31025 IRTemp src
= newTemp(Ity_V128
);
31026 IRTemp s0
= IRTemp_INVALID
;
31027 IRTemp s1
= IRTemp_INVALID
;
31028 IRTemp rm
= newTemp(Ity_I32
);
31031 modrm
= getUChar(delta
);
31033 if (epartIsReg(modrm
)) {
31034 UInt rE
= eregOfRexRM(pfx
, modrm
);
31035 assign( src
, getXMMReg( rE
) );
31036 imm
= getUChar(delta
+1);
31037 if (imm
& ~15) break;
31039 DIP( "vroundpd $%d,%s,%s\n", imm
, nameXMMReg(rE
), nameXMMReg(rG
) );
31041 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31042 assign( src
, loadLE(Ity_V128
, mkexpr(addr
) ) );
31043 imm
= getUChar(delta
+alen
);
31044 if (imm
& ~15) break;
31046 DIP( "vroundpd $%d,%s,%s\n", imm
, dis_buf
, nameXMMReg(rG
) );
31049 /* (imm & 3) contains an Intel-encoded rounding mode. Because
31050 that encoding is the same as the encoding for IRRoundingMode,
31051 we can use that value directly in the IR as a rounding
31053 assign(rm
, (imm
& 4) ? get_sse_roundingmode() : mkU32(imm
& 3));
31055 breakupV128to64s( src
, &s1
, &s0
);
31056 putYMMRegLane128( rG
, 1, mkV128(0) );
31057 # define CVT(s) binop(Iop_RoundF64toInt, mkexpr(rm), \
31058 unop(Iop_ReinterpI64asF64, mkexpr(s)))
31059 putYMMRegLane64F( rG
, 1, CVT(s1
) );
31060 putYMMRegLane64F( rG
, 0, CVT(s0
) );
31062 goto decode_success
;
31064 /* VROUNDPD imm8, ymm2/m256, ymm1 */
31065 /* VROUNDPD = VEX.NDS.256.66.0F3A.WIG 09 ib */
31066 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
31067 UChar modrm
= getUChar(delta
);
31068 UInt rG
= gregOfRexRM(pfx
, modrm
);
31069 IRTemp src
= newTemp(Ity_V256
);
31070 IRTemp s0
= IRTemp_INVALID
;
31071 IRTemp s1
= IRTemp_INVALID
;
31072 IRTemp s2
= IRTemp_INVALID
;
31073 IRTemp s3
= IRTemp_INVALID
;
31074 IRTemp rm
= newTemp(Ity_I32
);
31077 modrm
= getUChar(delta
);
31079 if (epartIsReg(modrm
)) {
31080 UInt rE
= eregOfRexRM(pfx
, modrm
);
31081 assign( src
, getYMMReg( rE
) );
31082 imm
= getUChar(delta
+1);
31083 if (imm
& ~15) break;
31085 DIP( "vroundpd $%d,%s,%s\n", imm
, nameYMMReg(rE
), nameYMMReg(rG
) );
31087 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31088 assign( src
, loadLE(Ity_V256
, mkexpr(addr
) ) );
31089 imm
= getUChar(delta
+alen
);
31090 if (imm
& ~15) break;
31092 DIP( "vroundpd $%d,%s,%s\n", imm
, dis_buf
, nameYMMReg(rG
) );
31095 /* (imm & 3) contains an Intel-encoded rounding mode. Because
31096 that encoding is the same as the encoding for IRRoundingMode,
31097 we can use that value directly in the IR as a rounding
31099 assign(rm
, (imm
& 4) ? get_sse_roundingmode() : mkU32(imm
& 3));
31101 breakupV256to64s( src
, &s3
, &s2
, &s1
, &s0
);
31102 # define CVT(s) binop(Iop_RoundF64toInt, mkexpr(rm), \
31103 unop(Iop_ReinterpI64asF64, mkexpr(s)))
31104 putYMMRegLane64F( rG
, 3, CVT(s3
) );
31105 putYMMRegLane64F( rG
, 2, CVT(s2
) );
31106 putYMMRegLane64F( rG
, 1, CVT(s1
) );
31107 putYMMRegLane64F( rG
, 0, CVT(s0
) );
31109 goto decode_success
;
31115 /* VROUNDSS imm8, xmm3/m32, xmm2, xmm1 */
31116 /* VROUNDSS = VEX.NDS.128.66.0F3A.WIG 0A ib */
31117 /* VROUNDSD imm8, xmm3/m64, xmm2, xmm1 */
31118 /* VROUNDSD = VEX.NDS.128.66.0F3A.WIG 0B ib */
31119 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31120 UChar modrm
= getUChar(delta
);
31121 UInt rG
= gregOfRexRM(pfx
, modrm
);
31122 UInt rV
= getVexNvvvv(pfx
);
31123 Bool isD
= opc
== 0x0B;
31124 IRTemp src
= newTemp(isD
? Ity_F64
: Ity_F32
);
31125 IRTemp res
= newTemp(isD
? Ity_F64
: Ity_F32
);
31128 if (epartIsReg(modrm
)) {
31129 UInt rE
= eregOfRexRM(pfx
, modrm
);
31131 isD
? getXMMRegLane64F(rE
, 0) : getXMMRegLane32F(rE
, 0) );
31132 imm
= getUChar(delta
+1);
31133 if (imm
& ~15) break;
31135 DIP( "vrounds%c $%d,%s,%s,%s\n",
31137 imm
, nameXMMReg( rE
), nameXMMReg( rV
), nameXMMReg( rG
) );
31139 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31140 assign( src
, loadLE( isD
? Ity_F64
: Ity_F32
, mkexpr(addr
) ));
31141 imm
= getUChar(delta
+alen
);
31142 if (imm
& ~15) break;
31144 DIP( "vrounds%c $%d,%s,%s,%s\n",
31146 imm
, dis_buf
, nameXMMReg( rV
), nameXMMReg( rG
) );
31149 /* (imm & 3) contains an Intel-encoded rounding mode. Because
31150 that encoding is the same as the encoding for IRRoundingMode,
31151 we can use that value directly in the IR as a rounding
31153 assign(res
, binop(isD
? Iop_RoundF64toInt
: Iop_RoundF32toInt
,
31154 (imm
& 4) ? get_sse_roundingmode()
31159 putXMMRegLane64F( rG
, 0, mkexpr(res
) );
31161 putXMMRegLane32F( rG
, 0, mkexpr(res
) );
31162 putXMMRegLane32F( rG
, 1, getXMMRegLane32F( rV
, 1 ) );
31164 putXMMRegLane64F( rG
, 1, getXMMRegLane64F( rV
, 1 ) );
31165 putYMMRegLane128( rG
, 1, mkV128(0) );
31167 goto decode_success
;
31172 /* VBLENDPS imm8, ymm3/m256, ymm2, ymm1 */
31173 /* VBLENDPS = VEX.NDS.256.66.0F3A.WIG 0C /r ib */
31174 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
31175 UChar modrm
= getUChar(delta
);
31177 UInt rG
= gregOfRexRM(pfx
, modrm
);
31178 UInt rV
= getVexNvvvv(pfx
);
31179 IRTemp sV
= newTemp(Ity_V256
);
31180 IRTemp sE
= newTemp(Ity_V256
);
31181 assign ( sV
, getYMMReg(rV
) );
31182 if (epartIsReg(modrm
)) {
31183 UInt rE
= eregOfRexRM(pfx
, modrm
);
31185 imm8
= getUChar(delta
);
31186 DIP("vblendps $%u,%s,%s,%s\n",
31187 imm8
, nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
31188 assign(sE
, getYMMReg(rE
));
31190 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31192 imm8
= getUChar(delta
);
31193 DIP("vblendps $%u,%s,%s,%s\n",
31194 imm8
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
31195 assign(sE
, loadLE(Ity_V256
, mkexpr(addr
)));
31199 mkexpr( math_BLENDPS_256( sE
, sV
, imm8
) ) );
31201 goto decode_success
;
31203 /* VBLENDPS imm8, xmm3/m128, xmm2, xmm1 */
31204 /* VBLENDPS = VEX.NDS.128.66.0F3A.WIG 0C /r ib */
31205 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31206 UChar modrm
= getUChar(delta
);
31208 UInt rG
= gregOfRexRM(pfx
, modrm
);
31209 UInt rV
= getVexNvvvv(pfx
);
31210 IRTemp sV
= newTemp(Ity_V128
);
31211 IRTemp sE
= newTemp(Ity_V128
);
31212 assign ( sV
, getXMMReg(rV
) );
31213 if (epartIsReg(modrm
)) {
31214 UInt rE
= eregOfRexRM(pfx
, modrm
);
31216 imm8
= getUChar(delta
);
31217 DIP("vblendps $%u,%s,%s,%s\n",
31218 imm8
, nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
31219 assign(sE
, getXMMReg(rE
));
31221 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31223 imm8
= getUChar(delta
);
31224 DIP("vblendps $%u,%s,%s,%s\n",
31225 imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
31226 assign(sE
, loadLE(Ity_V128
, mkexpr(addr
)));
31229 putYMMRegLoAndZU( rG
,
31230 mkexpr( math_BLENDPS_128( sE
, sV
, imm8
) ) );
31232 goto decode_success
;
31237 /* VBLENDPD imm8, ymm3/m256, ymm2, ymm1 */
31238 /* VBLENDPD = VEX.NDS.256.66.0F3A.WIG 0D /r ib */
31239 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
31240 UChar modrm
= getUChar(delta
);
31242 UInt rG
= gregOfRexRM(pfx
, modrm
);
31243 UInt rV
= getVexNvvvv(pfx
);
31244 IRTemp sV
= newTemp(Ity_V256
);
31245 IRTemp sE
= newTemp(Ity_V256
);
31246 assign ( sV
, getYMMReg(rV
) );
31247 if (epartIsReg(modrm
)) {
31248 UInt rE
= eregOfRexRM(pfx
, modrm
);
31250 imm8
= getUChar(delta
);
31251 DIP("vblendpd $%u,%s,%s,%s\n",
31252 imm8
, nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
31253 assign(sE
, getYMMReg(rE
));
31255 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31257 imm8
= getUChar(delta
);
31258 DIP("vblendpd $%u,%s,%s,%s\n",
31259 imm8
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
31260 assign(sE
, loadLE(Ity_V256
, mkexpr(addr
)));
31264 mkexpr( math_BLENDPD_256( sE
, sV
, imm8
) ) );
31266 goto decode_success
;
31268 /* VBLENDPD imm8, xmm3/m128, xmm2, xmm1 */
31269 /* VBLENDPD = VEX.NDS.128.66.0F3A.WIG 0D /r ib */
31270 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31271 UChar modrm
= getUChar(delta
);
31273 UInt rG
= gregOfRexRM(pfx
, modrm
);
31274 UInt rV
= getVexNvvvv(pfx
);
31275 IRTemp sV
= newTemp(Ity_V128
);
31276 IRTemp sE
= newTemp(Ity_V128
);
31277 assign ( sV
, getXMMReg(rV
) );
31278 if (epartIsReg(modrm
)) {
31279 UInt rE
= eregOfRexRM(pfx
, modrm
);
31281 imm8
= getUChar(delta
);
31282 DIP("vblendpd $%u,%s,%s,%s\n",
31283 imm8
, nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
31284 assign(sE
, getXMMReg(rE
));
31286 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31288 imm8
= getUChar(delta
);
31289 DIP("vblendpd $%u,%s,%s,%s\n",
31290 imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
31291 assign(sE
, loadLE(Ity_V128
, mkexpr(addr
)));
31294 putYMMRegLoAndZU( rG
,
31295 mkexpr( math_BLENDPD_128( sE
, sV
, imm8
) ) );
31297 goto decode_success
;
31302 /* VPBLENDW imm8, xmm3/m128, xmm2, xmm1 */
31303 /* VPBLENDW = VEX.NDS.128.66.0F3A.WIG 0E /r ib */
31304 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31305 UChar modrm
= getUChar(delta
);
31307 UInt rG
= gregOfRexRM(pfx
, modrm
);
31308 UInt rV
= getVexNvvvv(pfx
);
31309 IRTemp sV
= newTemp(Ity_V128
);
31310 IRTemp sE
= newTemp(Ity_V128
);
31311 assign ( sV
, getXMMReg(rV
) );
31312 if (epartIsReg(modrm
)) {
31313 UInt rE
= eregOfRexRM(pfx
, modrm
);
31315 imm8
= getUChar(delta
);
31316 DIP("vpblendw $%u,%s,%s,%s\n",
31317 imm8
, nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
));
31318 assign(sE
, getXMMReg(rE
));
31320 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31322 imm8
= getUChar(delta
);
31323 DIP("vpblendw $%u,%s,%s,%s\n",
31324 imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
));
31325 assign(sE
, loadLE(Ity_V128
, mkexpr(addr
)));
31328 putYMMRegLoAndZU( rG
,
31329 mkexpr( math_PBLENDW_128( sE
, sV
, imm8
) ) );
31331 goto decode_success
;
31333 /* VPBLENDW imm8, ymm3/m256, ymm2, ymm1 */
31334 /* VPBLENDW = VEX.NDS.256.66.0F3A.WIG 0E /r ib */
31335 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
31336 UChar modrm
= getUChar(delta
);
31338 UInt rG
= gregOfRexRM(pfx
, modrm
);
31339 UInt rV
= getVexNvvvv(pfx
);
31340 IRTemp sV
= newTemp(Ity_V256
);
31341 IRTemp sE
= newTemp(Ity_V256
);
31342 IRTemp sVhi
, sVlo
, sEhi
, sElo
;
31343 sVhi
= sVlo
= sEhi
= sElo
= IRTemp_INVALID
;
31344 assign ( sV
, getYMMReg(rV
) );
31345 if (epartIsReg(modrm
)) {
31346 UInt rE
= eregOfRexRM(pfx
, modrm
);
31348 imm8
= getUChar(delta
);
31349 DIP("vpblendw $%u,%s,%s,%s\n",
31350 imm8
, nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
31351 assign(sE
, getYMMReg(rE
));
31353 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31355 imm8
= getUChar(delta
);
31356 DIP("vpblendw $%u,%s,%s,%s\n",
31357 imm8
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
31358 assign(sE
, loadLE(Ity_V256
, mkexpr(addr
)));
31361 breakupV256toV128s( sV
, &sVhi
, &sVlo
);
31362 breakupV256toV128s( sE
, &sEhi
, &sElo
);
31363 putYMMReg( rG
, binop( Iop_V128HLtoV256
,
31364 mkexpr( math_PBLENDW_128( sEhi
, sVhi
, imm8
) ),
31365 mkexpr( math_PBLENDW_128( sElo
, sVlo
, imm8
) ) ) );
31367 goto decode_success
;
31372 /* VPALIGNR imm8, xmm3/m128, xmm2, xmm1 */
31373 /* VPALIGNR = VEX.NDS.128.66.0F3A.WIG 0F /r ib */
31374 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31375 UChar modrm
= getUChar(delta
);
31376 UInt rG
= gregOfRexRM(pfx
, modrm
);
31377 UInt rV
= getVexNvvvv(pfx
);
31378 IRTemp sV
= newTemp(Ity_V128
);
31379 IRTemp dV
= newTemp(Ity_V128
);
31382 assign( dV
, getXMMReg(rV
) );
31384 if ( epartIsReg( modrm
) ) {
31385 UInt rE
= eregOfRexRM(pfx
, modrm
);
31386 assign( sV
, getXMMReg(rE
) );
31387 imm8
= getUChar(delta
+1);
31389 DIP("vpalignr $%u,%s,%s,%s\n", imm8
, nameXMMReg(rE
),
31390 nameXMMReg(rV
), nameXMMReg(rG
));
31392 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31393 assign( sV
, loadLE(Ity_V128
, mkexpr(addr
)) );
31394 imm8
= getUChar(delta
+alen
);
31396 DIP("vpalignr $%u,%s,%s,%s\n", imm8
, dis_buf
,
31397 nameXMMReg(rV
), nameXMMReg(rG
));
31400 IRTemp res
= math_PALIGNR_XMM( sV
, dV
, imm8
);
31401 putYMMRegLoAndZU( rG
, mkexpr(res
) );
31403 goto decode_success
;
31405 /* VPALIGNR imm8, ymm3/m256, ymm2, ymm1 */
31406 /* VPALIGNR = VEX.NDS.256.66.0F3A.WIG 0F /r ib */
31407 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
31408 UChar modrm
= getUChar(delta
);
31409 UInt rG
= gregOfRexRM(pfx
, modrm
);
31410 UInt rV
= getVexNvvvv(pfx
);
31411 IRTemp sV
= newTemp(Ity_V256
);
31412 IRTemp dV
= newTemp(Ity_V256
);
31413 IRTemp sHi
, sLo
, dHi
, dLo
;
31414 sHi
= sLo
= dHi
= dLo
= IRTemp_INVALID
;
31417 assign( dV
, getYMMReg(rV
) );
31419 if ( epartIsReg( modrm
) ) {
31420 UInt rE
= eregOfRexRM(pfx
, modrm
);
31421 assign( sV
, getYMMReg(rE
) );
31422 imm8
= getUChar(delta
+1);
31424 DIP("vpalignr $%u,%s,%s,%s\n", imm8
, nameYMMReg(rE
),
31425 nameYMMReg(rV
), nameYMMReg(rG
));
31427 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31428 assign( sV
, loadLE(Ity_V256
, mkexpr(addr
)) );
31429 imm8
= getUChar(delta
+alen
);
31431 DIP("vpalignr $%u,%s,%s,%s\n", imm8
, dis_buf
,
31432 nameYMMReg(rV
), nameYMMReg(rG
));
31435 breakupV256toV128s( dV
, &dHi
, &dLo
);
31436 breakupV256toV128s( sV
, &sHi
, &sLo
);
31437 putYMMReg( rG
, binop( Iop_V128HLtoV256
,
31438 mkexpr( math_PALIGNR_XMM( sHi
, dHi
, imm8
) ),
31439 mkexpr( math_PALIGNR_XMM( sLo
, dLo
, imm8
) ) )
31442 goto decode_success
;
31447 /* VPEXTRB imm8, xmm2, reg/m8 = VEX.128.66.0F3A.W0 14 /r ib */
31448 if (have66noF2noF3(pfx
)
31449 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
31450 delta
= dis_PEXTRB_128_GtoE( vbi
, pfx
, delta
, False
/*!isAvx*/ );
31451 goto decode_success
;
31456 /* VPEXTRW imm8, reg/m16, xmm2 */
31457 /* VPEXTRW = VEX.128.66.0F3A.W0 15 /r ib */
31458 if (have66noF2noF3(pfx
)
31459 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
31460 delta
= dis_PEXTRW( vbi
, pfx
, delta
, True
/*isAvx*/ );
31461 goto decode_success
;
31466 /* VPEXTRD imm8, r32/m32, xmm2 */
31467 /* VPEXTRD = VEX.128.66.0F3A.W0 16 /r ib */
31468 if (have66noF2noF3(pfx
)
31469 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
31470 delta
= dis_PEXTRD( vbi
, pfx
, delta
, True
/*isAvx*/ );
31471 goto decode_success
;
31473 /* VPEXTRQ = VEX.128.66.0F3A.W1 16 /r ib */
31474 if (have66noF2noF3(pfx
)
31475 && 0==getVexL(pfx
)/*128*/ && 1==getRexW(pfx
)/*W1*/) {
31476 delta
= dis_PEXTRQ( vbi
, pfx
, delta
, True
/*isAvx*/ );
31477 goto decode_success
;
31482 /* VEXTRACTPS imm8, xmm1, r32/m32 = VEX.128.66.0F3A.WIG 17 /r ib */
31483 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31484 delta
= dis_EXTRACTPS( vbi
, pfx
, delta
, True
/*isAvx*/ );
31485 goto decode_success
;
31490 /* VINSERTF128 r/m, rV, rD
31491 ::: rD = insertinto(a lane in rV, 128 bits from r/m) */
31492 /* VINSERTF128 = VEX.NDS.256.66.0F3A.W0 18 /r ib */
31493 if (have66noF2noF3(pfx
)
31494 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
31495 UChar modrm
= getUChar(delta
);
31497 UInt rG
= gregOfRexRM(pfx
, modrm
);
31498 UInt rV
= getVexNvvvv(pfx
);
31499 IRTemp t128
= newTemp(Ity_V128
);
31500 if (epartIsReg(modrm
)) {
31501 UInt rE
= eregOfRexRM(pfx
, modrm
);
31503 assign(t128
, getXMMReg(rE
));
31504 ib
= getUChar(delta
);
31505 DIP("vinsertf128 $%u,%s,%s,%s\n",
31506 ib
, nameXMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
31508 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31509 assign(t128
, loadLE(Ity_V128
, mkexpr(addr
)));
31511 ib
= getUChar(delta
);
31512 DIP("vinsertf128 $%u,%s,%s,%s\n",
31513 ib
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
31516 putYMMRegLane128(rG
, 0, getYMMRegLane128(rV
, 0));
31517 putYMMRegLane128(rG
, 1, getYMMRegLane128(rV
, 1));
31518 putYMMRegLane128(rG
, ib
& 1, mkexpr(t128
));
31520 goto decode_success
;
31525 /* VEXTRACTF128 $lane_no, rS, r/m
31526 ::: r/m:V128 = a lane of rS:V256 (RM format) */
31527 /* VEXTRACTF128 = VEX.256.66.0F3A.W0 19 /r ib */
31528 if (have66noF2noF3(pfx
)
31529 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
31530 UChar modrm
= getUChar(delta
);
31532 UInt rS
= gregOfRexRM(pfx
, modrm
);
31533 IRTemp t128
= newTemp(Ity_V128
);
31534 if (epartIsReg(modrm
)) {
31535 UInt rD
= eregOfRexRM(pfx
, modrm
);
31537 ib
= getUChar(delta
);
31538 assign(t128
, getYMMRegLane128(rS
, ib
& 1));
31539 putYMMRegLoAndZU(rD
, mkexpr(t128
));
31540 DIP("vextractf128 $%u,%s,%s\n",
31541 ib
, nameXMMReg(rS
), nameYMMReg(rD
));
31543 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31545 ib
= getUChar(delta
);
31546 assign(t128
, getYMMRegLane128(rS
, ib
& 1));
31547 storeLE(mkexpr(addr
), mkexpr(t128
));
31548 DIP("vextractf128 $%u,%s,%s\n",
31549 ib
, nameYMMReg(rS
), dis_buf
);
31552 /* doesn't use vvvv */
31553 goto decode_success
;
31558 /* VCVTPS2PH imm8, xmm2, xmm1/m64 = VEX.128.66.0F3A.W0 1D /r ib */
31559 if (have66noF2noF3(pfx
)
31560 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/
31561 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_F16C
)) {
31562 delta
= dis_VCVTPS2PH( vbi
, pfx
, delta
, /*is256bit=*/False
);
31563 goto decode_success
;
31565 /* VCVTPS2PH imm8, ymm2, ymm1/m128 = VEX.256.66.0F3A.W0 1D /r ib */
31566 if (have66noF2noF3(pfx
)
31567 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/
31568 && (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_F16C
)) {
31569 delta
= dis_VCVTPS2PH( vbi
, pfx
, delta
, /*is256bit=*/True
);
31570 goto decode_success
;
31575 /* VPINSRB r32/m8, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 20 /r ib */
31576 if (have66noF2noF3(pfx
)
31577 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
31578 UChar modrm
= getUChar(delta
);
31579 UInt rG
= gregOfRexRM(pfx
, modrm
);
31580 UInt rV
= getVexNvvvv(pfx
);
31582 IRTemp src_u8
= newTemp(Ity_I8
);
31584 if ( epartIsReg( modrm
) ) {
31585 UInt rE
= eregOfRexRM(pfx
,modrm
);
31586 imm8
= (Int
)(getUChar(delta
+1) & 15);
31587 assign( src_u8
, unop(Iop_32to8
, getIReg32( rE
)) );
31589 DIP( "vpinsrb $%d,%s,%s,%s\n",
31590 imm8
, nameIReg32(rE
), nameXMMReg(rV
), nameXMMReg(rG
) );
31592 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31593 imm8
= (Int
)(getUChar(delta
+alen
) & 15);
31594 assign( src_u8
, loadLE( Ity_I8
, mkexpr(addr
) ) );
31596 DIP( "vpinsrb $%d,%s,%s,%s\n",
31597 imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
) );
31600 IRTemp src_vec
= newTemp(Ity_V128
);
31601 assign(src_vec
, getXMMReg( rV
));
31602 IRTemp res_vec
= math_PINSRB_128( src_vec
, src_u8
, imm8
);
31603 putYMMRegLoAndZU( rG
, mkexpr(res_vec
) );
31605 goto decode_success
;
31610 /* VINSERTPS imm8, xmm3/m32, xmm2, xmm1
31611 = VEX.NDS.128.66.0F3A.WIG 21 /r ib */
31612 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31613 UChar modrm
= getUChar(delta
);
31614 UInt rG
= gregOfRexRM(pfx
, modrm
);
31615 UInt rV
= getVexNvvvv(pfx
);
31617 IRTemp d2ins
= newTemp(Ity_I32
); /* comes from the E part */
31618 const IRTemp inval
= IRTemp_INVALID
;
31620 if ( epartIsReg( modrm
) ) {
31621 UInt rE
= eregOfRexRM(pfx
, modrm
);
31622 IRTemp vE
= newTemp(Ity_V128
);
31623 assign( vE
, getXMMReg(rE
) );
31624 IRTemp dsE
[4] = { inval
, inval
, inval
, inval
};
31625 breakupV128to32s( vE
, &dsE
[3], &dsE
[2], &dsE
[1], &dsE
[0] );
31626 imm8
= getUChar(delta
+1);
31627 d2ins
= dsE
[(imm8
>> 6) & 3]; /* "imm8_count_s" */
31629 DIP( "insertps $%u, %s,%s\n",
31630 imm8
, nameXMMReg(rE
), nameXMMReg(rG
) );
31632 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31633 assign( d2ins
, loadLE( Ity_I32
, mkexpr(addr
) ) );
31634 imm8
= getUChar(delta
+alen
);
31636 DIP( "insertps $%u, %s,%s\n",
31637 imm8
, dis_buf
, nameXMMReg(rG
) );
31640 IRTemp vV
= newTemp(Ity_V128
);
31641 assign( vV
, getXMMReg(rV
) );
31643 putYMMRegLoAndZU( rG
, mkexpr(math_INSERTPS( vV
, d2ins
, imm8
)) );
31645 goto decode_success
;
31650 /* VPINSRD r32/m32, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 22 /r ib */
31651 if (have66noF2noF3(pfx
)
31652 && 0==getVexL(pfx
)/*128*/ && 0==getRexW(pfx
)/*W0*/) {
31653 UChar modrm
= getUChar(delta
);
31654 UInt rG
= gregOfRexRM(pfx
, modrm
);
31655 UInt rV
= getVexNvvvv(pfx
);
31657 IRTemp src_u32
= newTemp(Ity_I32
);
31659 if ( epartIsReg( modrm
) ) {
31660 UInt rE
= eregOfRexRM(pfx
,modrm
);
31661 imm8_10
= (Int
)(getUChar(delta
+1) & 3);
31662 assign( src_u32
, getIReg32( rE
) );
31664 DIP( "vpinsrd $%d,%s,%s,%s\n",
31665 imm8_10
, nameIReg32(rE
), nameXMMReg(rV
), nameXMMReg(rG
) );
31667 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31668 imm8_10
= (Int
)(getUChar(delta
+alen
) & 3);
31669 assign( src_u32
, loadLE( Ity_I32
, mkexpr(addr
) ) );
31671 DIP( "vpinsrd $%d,%s,%s,%s\n",
31672 imm8_10
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
) );
31675 IRTemp src_vec
= newTemp(Ity_V128
);
31676 assign(src_vec
, getXMMReg( rV
));
31677 IRTemp res_vec
= math_PINSRD_128( src_vec
, src_u32
, imm8_10
);
31678 putYMMRegLoAndZU( rG
, mkexpr(res_vec
) );
31680 goto decode_success
;
31682 /* VPINSRQ r64/m64, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W1 22 /r ib */
31683 if (have66noF2noF3(pfx
)
31684 && 0==getVexL(pfx
)/*128*/ && 1==getRexW(pfx
)/*W1*/) {
31685 UChar modrm
= getUChar(delta
);
31686 UInt rG
= gregOfRexRM(pfx
, modrm
);
31687 UInt rV
= getVexNvvvv(pfx
);
31689 IRTemp src_u64
= newTemp(Ity_I64
);
31691 if ( epartIsReg( modrm
) ) {
31692 UInt rE
= eregOfRexRM(pfx
,modrm
);
31693 imm8_0
= (Int
)(getUChar(delta
+1) & 1);
31694 assign( src_u64
, getIReg64( rE
) );
31696 DIP( "vpinsrq $%d,%s,%s,%s\n",
31697 imm8_0
, nameIReg64(rE
), nameXMMReg(rV
), nameXMMReg(rG
) );
31699 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31700 imm8_0
= (Int
)(getUChar(delta
+alen
) & 1);
31701 assign( src_u64
, loadLE( Ity_I64
, mkexpr(addr
) ) );
31703 DIP( "vpinsrq $%d,%s,%s,%s\n",
31704 imm8_0
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
) );
31707 IRTemp src_vec
= newTemp(Ity_V128
);
31708 assign(src_vec
, getXMMReg( rV
));
31709 IRTemp res_vec
= math_PINSRQ_128( src_vec
, src_u64
, imm8_0
);
31710 putYMMRegLoAndZU( rG
, mkexpr(res_vec
) );
31712 goto decode_success
;
31717 /* VINSERTI128 r/m, rV, rD
31718 ::: rD = insertinto(a lane in rV, 128 bits from r/m) */
31719 /* VINSERTI128 = VEX.NDS.256.66.0F3A.W0 38 /r ib */
31720 if (have66noF2noF3(pfx
)
31721 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
31722 UChar modrm
= getUChar(delta
);
31724 UInt rG
= gregOfRexRM(pfx
, modrm
);
31725 UInt rV
= getVexNvvvv(pfx
);
31726 IRTemp t128
= newTemp(Ity_V128
);
31727 if (epartIsReg(modrm
)) {
31728 UInt rE
= eregOfRexRM(pfx
, modrm
);
31730 assign(t128
, getXMMReg(rE
));
31731 ib
= getUChar(delta
);
31732 DIP("vinserti128 $%u,%s,%s,%s\n",
31733 ib
, nameXMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
31735 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31736 assign(t128
, loadLE(Ity_V128
, mkexpr(addr
)));
31738 ib
= getUChar(delta
);
31739 DIP("vinserti128 $%u,%s,%s,%s\n",
31740 ib
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
31743 putYMMRegLane128(rG
, 0, getYMMRegLane128(rV
, 0));
31744 putYMMRegLane128(rG
, 1, getYMMRegLane128(rV
, 1));
31745 putYMMRegLane128(rG
, ib
& 1, mkexpr(t128
));
31747 goto decode_success
;
31752 /* VEXTRACTI128 $lane_no, rS, r/m
31753 ::: r/m:V128 = a lane of rS:V256 (RM format) */
31754 /* VEXTRACTI128 = VEX.256.66.0F3A.W0 39 /r ib */
31755 if (have66noF2noF3(pfx
)
31756 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
31757 UChar modrm
= getUChar(delta
);
31759 UInt rS
= gregOfRexRM(pfx
, modrm
);
31760 IRTemp t128
= newTemp(Ity_V128
);
31761 if (epartIsReg(modrm
)) {
31762 UInt rD
= eregOfRexRM(pfx
, modrm
);
31764 ib
= getUChar(delta
);
31765 assign(t128
, getYMMRegLane128(rS
, ib
& 1));
31766 putYMMRegLoAndZU(rD
, mkexpr(t128
));
31767 DIP("vextracti128 $%u,%s,%s\n",
31768 ib
, nameXMMReg(rS
), nameYMMReg(rD
));
31770 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31772 ib
= getUChar(delta
);
31773 assign(t128
, getYMMRegLane128(rS
, ib
& 1));
31774 storeLE(mkexpr(addr
), mkexpr(t128
));
31775 DIP("vextracti128 $%u,%s,%s\n",
31776 ib
, nameYMMReg(rS
), dis_buf
);
31779 /* doesn't use vvvv */
31780 goto decode_success
;
31785 /* VDPPS imm8, xmm3/m128,xmm2,xmm1 = VEX.NDS.128.66.0F3A.WIG 40 /r ib */
31786 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31787 UChar modrm
= getUChar(delta
);
31788 UInt rG
= gregOfRexRM(pfx
, modrm
);
31789 UInt rV
= getVexNvvvv(pfx
);
31790 IRTemp dst_vec
= newTemp(Ity_V128
);
31792 if (epartIsReg( modrm
)) {
31793 UInt rE
= eregOfRexRM(pfx
,modrm
);
31794 imm8
= (Int
)getUChar(delta
+1);
31795 assign( dst_vec
, getXMMReg( rE
) );
31797 DIP( "vdpps $%d,%s,%s,%s\n",
31798 imm8
, nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
) );
31800 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31801 imm8
= (Int
)getUChar(delta
+alen
);
31802 assign( dst_vec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
31804 DIP( "vdpps $%d,%s,%s,%s\n",
31805 imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
) );
31808 IRTemp src_vec
= newTemp(Ity_V128
);
31809 assign(src_vec
, getXMMReg( rV
));
31810 IRTemp res_vec
= math_DPPS_128( src_vec
, dst_vec
, imm8
);
31811 putYMMRegLoAndZU( rG
, mkexpr(res_vec
) );
31813 goto decode_success
;
31815 /* VDPPS imm8, ymm3/m128,ymm2,ymm1 = VEX.NDS.256.66.0F3A.WIG 40 /r ib */
31816 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
31817 UChar modrm
= getUChar(delta
);
31818 UInt rG
= gregOfRexRM(pfx
, modrm
);
31819 UInt rV
= getVexNvvvv(pfx
);
31820 IRTemp dst_vec
= newTemp(Ity_V256
);
31822 if (epartIsReg( modrm
)) {
31823 UInt rE
= eregOfRexRM(pfx
,modrm
);
31824 imm8
= (Int
)getUChar(delta
+1);
31825 assign( dst_vec
, getYMMReg( rE
) );
31827 DIP( "vdpps $%d,%s,%s,%s\n",
31828 imm8
, nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
) );
31830 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31831 imm8
= (Int
)getUChar(delta
+alen
);
31832 assign( dst_vec
, loadLE( Ity_V256
, mkexpr(addr
) ) );
31834 DIP( "vdpps $%d,%s,%s,%s\n",
31835 imm8
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
) );
31838 IRTemp src_vec
= newTemp(Ity_V256
);
31839 assign(src_vec
, getYMMReg( rV
));
31840 IRTemp s0
, s1
, d0
, d1
;
31841 s0
= s1
= d0
= d1
= IRTemp_INVALID
;
31842 breakupV256toV128s( dst_vec
, &d1
, &d0
);
31843 breakupV256toV128s( src_vec
, &s1
, &s0
);
31844 putYMMReg( rG
, binop( Iop_V128HLtoV256
,
31845 mkexpr( math_DPPS_128(s1
, d1
, imm8
) ),
31846 mkexpr( math_DPPS_128(s0
, d0
, imm8
) ) ) );
31848 goto decode_success
;
31853 /* VDPPD imm8, xmm3/m128,xmm2,xmm1 = VEX.NDS.128.66.0F3A.WIG 41 /r ib */
31854 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31855 UChar modrm
= getUChar(delta
);
31856 UInt rG
= gregOfRexRM(pfx
, modrm
);
31857 UInt rV
= getVexNvvvv(pfx
);
31858 IRTemp dst_vec
= newTemp(Ity_V128
);
31860 if (epartIsReg( modrm
)) {
31861 UInt rE
= eregOfRexRM(pfx
,modrm
);
31862 imm8
= (Int
)getUChar(delta
+1);
31863 assign( dst_vec
, getXMMReg( rE
) );
31865 DIP( "vdppd $%d,%s,%s,%s\n",
31866 imm8
, nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
) );
31868 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
31869 imm8
= (Int
)getUChar(delta
+alen
);
31870 assign( dst_vec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
31872 DIP( "vdppd $%d,%s,%s,%s\n",
31873 imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
) );
31876 IRTemp src_vec
= newTemp(Ity_V128
);
31877 assign(src_vec
, getXMMReg( rV
));
31878 IRTemp res_vec
= math_DPPD_128( src_vec
, dst_vec
, imm8
);
31879 putYMMRegLoAndZU( rG
, mkexpr(res_vec
) );
31881 goto decode_success
;
31886 /* VMPSADBW imm8, xmm3/m128,xmm2,xmm1 */
31887 /* VMPSADBW = VEX.NDS.128.66.0F3A.WIG 42 /r ib */
31888 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31889 UChar modrm
= getUChar(delta
);
31891 IRTemp src_vec
= newTemp(Ity_V128
);
31892 IRTemp dst_vec
= newTemp(Ity_V128
);
31893 UInt rG
= gregOfRexRM(pfx
, modrm
);
31894 UInt rV
= getVexNvvvv(pfx
);
31896 assign( dst_vec
, getXMMReg(rV
) );
31898 if ( epartIsReg( modrm
) ) {
31899 UInt rE
= eregOfRexRM(pfx
, modrm
);
31901 imm8
= (Int
)getUChar(delta
+1);
31902 assign( src_vec
, getXMMReg(rE
) );
31904 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8
,
31905 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
) );
31907 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
31908 1/* imm8 is 1 byte after the amode */ );
31909 assign( src_vec
, loadLE( Ity_V128
, mkexpr(addr
) ) );
31910 imm8
= (Int
)getUChar(delta
+alen
);
31912 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8
,
31913 dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
) );
31916 putYMMRegLoAndZU( rG
, mkexpr( math_MPSADBW_128(dst_vec
,
31917 src_vec
, imm8
) ) );
31919 goto decode_success
;
31921 /* VMPSADBW imm8, ymm3/m256,ymm2,ymm1 */
31922 /* VMPSADBW = VEX.NDS.256.66.0F3A.WIG 42 /r ib */
31923 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
31924 UChar modrm
= getUChar(delta
);
31926 IRTemp src_vec
= newTemp(Ity_V256
);
31927 IRTemp dst_vec
= newTemp(Ity_V256
);
31928 UInt rG
= gregOfRexRM(pfx
, modrm
);
31929 UInt rV
= getVexNvvvv(pfx
);
31930 IRTemp sHi
, sLo
, dHi
, dLo
;
31931 sHi
= sLo
= dHi
= dLo
= IRTemp_INVALID
;
31933 assign( dst_vec
, getYMMReg(rV
) );
31935 if ( epartIsReg( modrm
) ) {
31936 UInt rE
= eregOfRexRM(pfx
, modrm
);
31938 imm8
= (Int
)getUChar(delta
+1);
31939 assign( src_vec
, getYMMReg(rE
) );
31941 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8
,
31942 nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
) );
31944 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
31945 1/* imm8 is 1 byte after the amode */ );
31946 assign( src_vec
, loadLE( Ity_V256
, mkexpr(addr
) ) );
31947 imm8
= (Int
)getUChar(delta
+alen
);
31949 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8
,
31950 dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
) );
31953 breakupV256toV128s( dst_vec
, &dHi
, &dLo
);
31954 breakupV256toV128s( src_vec
, &sHi
, &sLo
);
31955 putYMMReg( rG
, binop( Iop_V128HLtoV256
,
31956 mkexpr( math_MPSADBW_128(dHi
, sHi
, imm8
>> 3) ),
31957 mkexpr( math_MPSADBW_128(dLo
, sLo
, imm8
) ) ) );
31959 goto decode_success
;
31964 /* VPCLMULQDQ imm8, xmm3/m128,xmm2,xmm1 */
31965 /* VPCLMULQDQ = VEX.NDS.128.66.0F3A.WIG 44 /r ib */
31966 /* 66 0F 3A 44 /r ib = PCLMULQDQ xmm1, xmm2/m128, imm8
31967 * Carry-less multiplication of selected XMM quadwords into XMM
31968 * registers (a.k.a multiplication of polynomials over GF(2))
31970 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
31971 UChar modrm
= getUChar(delta
);
31973 IRTemp sV
= newTemp(Ity_V128
);
31974 IRTemp dV
= newTemp(Ity_V128
);
31975 UInt rG
= gregOfRexRM(pfx
, modrm
);
31976 UInt rV
= getVexNvvvv(pfx
);
31978 assign( dV
, getXMMReg(rV
) );
31980 if ( epartIsReg( modrm
) ) {
31981 UInt rE
= eregOfRexRM(pfx
, modrm
);
31982 imm8
= (Int
)getUChar(delta
+1);
31983 assign( sV
, getXMMReg(rE
) );
31985 DIP( "vpclmulqdq $%d, %s,%s,%s\n", imm8
,
31986 nameXMMReg(rE
), nameXMMReg(rV
), nameXMMReg(rG
) );
31988 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
,
31989 1/* imm8 is 1 byte after the amode */ );
31990 assign( sV
, loadLE( Ity_V128
, mkexpr(addr
) ) );
31991 imm8
= (Int
)getUChar(delta
+alen
);
31993 DIP( "vpclmulqdq $%d, %s,%s,%s\n",
31994 imm8
, dis_buf
, nameXMMReg(rV
), nameXMMReg(rG
) );
31997 putYMMRegLoAndZU( rG
, mkexpr( math_PCLMULQDQ(dV
, sV
, imm8
) ) );
31999 goto decode_success
;
32004 /* VPERM2I128 imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.66.0F3A.W0 46 /r ib */
32005 if (have66noF2noF3(pfx
)
32006 && 1==getVexL(pfx
)/*256*/ && 0==getRexW(pfx
)/*W0*/) {
32007 UChar modrm
= getUChar(delta
);
32009 UInt rG
= gregOfRexRM(pfx
, modrm
);
32010 UInt rV
= getVexNvvvv(pfx
);
32011 IRTemp s00
= newTemp(Ity_V128
);
32012 IRTemp s01
= newTemp(Ity_V128
);
32013 IRTemp s10
= newTemp(Ity_V128
);
32014 IRTemp s11
= newTemp(Ity_V128
);
32015 assign(s00
, getYMMRegLane128(rV
, 0));
32016 assign(s01
, getYMMRegLane128(rV
, 1));
32017 if (epartIsReg(modrm
)) {
32018 UInt rE
= eregOfRexRM(pfx
, modrm
);
32020 imm8
= getUChar(delta
);
32021 DIP("vperm2i128 $%u,%s,%s,%s\n",
32022 imm8
, nameYMMReg(rE
), nameYMMReg(rV
), nameYMMReg(rG
));
32023 assign(s10
, getYMMRegLane128(rE
, 0));
32024 assign(s11
, getYMMRegLane128(rE
, 1));
32026 addr
= disAMode( &alen
, vbi
, pfx
, delta
, dis_buf
, 1 );
32028 imm8
= getUChar(delta
);
32029 DIP("vperm2i128 $%u,%s,%s,%s\n",
32030 imm8
, dis_buf
, nameYMMReg(rV
), nameYMMReg(rG
));
32031 assign(s10
, loadLE(Ity_V128
, binop(Iop_Add64
,
32032 mkexpr(addr
), mkU64(0))));
32033 assign(s11
, loadLE(Ity_V128
, binop(Iop_Add64
,
32034 mkexpr(addr
), mkU64(16))));
32037 # define SEL(_nn) (((_nn)==0) ? s00 : ((_nn)==1) ? s01 \
32038 : ((_nn)==2) ? s10 : s11)
32039 putYMMRegLane128(rG
, 0, mkexpr(SEL((imm8
>> 0) & 3)));
32040 putYMMRegLane128(rG
, 1, mkexpr(SEL((imm8
>> 4) & 3)));
32042 if (imm8
& (1<<3)) putYMMRegLane128(rG
, 0, mkV128(0));
32043 if (imm8
& (1<<7)) putYMMRegLane128(rG
, 1, mkV128(0));
32045 goto decode_success
;
32050 /* VBLENDVPS xmmG, xmmE/memE, xmmV, xmmIS4
32051 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */
32052 /* VBLENDVPS = VEX.NDS.128.66.0F3A.WIG 4A /r /is4 */
32053 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
32054 delta
= dis_VBLENDV_128 ( vbi
, pfx
, delta
,
32055 "vblendvps", 4, Iop_SarN32x4
);
32057 goto decode_success
;
32059 /* VBLENDVPS ymmG, ymmE/memE, ymmV, ymmIS4
32060 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */
32061 /* VBLENDVPS = VEX.NDS.256.66.0F3A.WIG 4A /r /is4 */
32062 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
32063 delta
= dis_VBLENDV_256 ( vbi
, pfx
, delta
,
32064 "vblendvps", 4, Iop_SarN32x4
);
32066 goto decode_success
;
32071 /* VBLENDVPD xmmG, xmmE/memE, xmmV, xmmIS4
32072 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */
32073 /* VBLENDVPD = VEX.NDS.128.66.0F3A.WIG 4B /r /is4 */
32074 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
32075 delta
= dis_VBLENDV_128 ( vbi
, pfx
, delta
,
32076 "vblendvpd", 8, Iop_SarN64x2
);
32078 goto decode_success
;
32080 /* VBLENDVPD ymmG, ymmE/memE, ymmV, ymmIS4
32081 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */
32082 /* VBLENDVPD = VEX.NDS.256.66.0F3A.WIG 4B /r /is4 */
32083 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
32084 delta
= dis_VBLENDV_256 ( vbi
, pfx
, delta
,
32085 "vblendvpd", 8, Iop_SarN64x2
);
32087 goto decode_success
;
32092 /* VPBLENDVB xmmG, xmmE/memE, xmmV, xmmIS4
32093 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */
32094 /* VPBLENDVB = VEX.NDS.128.66.0F3A.WIG 4C /r /is4 */
32095 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
32096 delta
= dis_VBLENDV_128 ( vbi
, pfx
, delta
,
32097 "vpblendvb", 1, Iop_SarN8x16
);
32099 goto decode_success
;
32101 /* VPBLENDVB ymmG, ymmE/memE, ymmV, ymmIS4
32102 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */
32103 /* VPBLENDVB = VEX.NDS.256.66.0F3A.WIG 4C /r /is4 */
32104 if (have66noF2noF3(pfx
) && 1==getVexL(pfx
)/*256*/) {
32105 delta
= dis_VBLENDV_256 ( vbi
, pfx
, delta
,
32106 "vpblendvb", 1, Iop_SarN8x16
);
32108 goto decode_success
;
32116 /* VEX.128.66.0F3A.WIG 63 /r ib = VPCMPISTRI imm8, xmm2/m128, xmm1
32117 VEX.128.66.0F3A.WIG 62 /r ib = VPCMPISTRM imm8, xmm2/m128, xmm1
32118 VEX.128.66.0F3A.WIG 61 /r ib = VPCMPESTRI imm8, xmm2/m128, xmm1
32119 VEX.128.66.0F3A.WIG 60 /r ib = VPCMPESTRM imm8, xmm2/m128, xmm1
32120 (selected special cases that actually occur in glibc,
32121 not by any means a complete implementation.)
32123 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
32124 Long delta0
= delta
;
32125 delta
= dis_PCMPxSTRx( vbi
, pfx
, delta
, True
/*isAvx*/, opc
);
32126 if (delta
> delta0
) goto decode_success
;
32127 /* else fall though; dis_PCMPxSTRx failed to decode it */
32131 case 0x5C ... 0x5F:
32132 case 0x68 ... 0x6F:
32133 case 0x78 ... 0x7F:
32134 /* FIXME: list the instructions decoded here */
32135 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
32136 Long delta0
= delta
;
32137 delta
= dis_FMA4( pfx
, delta
, opc
, uses_vvvv
, vbi
);
32138 if (delta
> delta0
) {
32139 dres
->hint
= Dis_HintVerbose
;
32140 goto decode_success
;
32142 /* else fall though; dis_FMA4 failed to decode it */
32147 /* VAESKEYGENASSIST imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG DF /r */
32148 if (have66noF2noF3(pfx
) && 0==getVexL(pfx
)/*128*/) {
32149 delta
= dis_AESKEYGENASSIST( vbi
, pfx
, delta
, True
/*!isAvx*/ );
32150 goto decode_success
;
32155 /* RORX imm8, r/m32, r32a = VEX.LZ.F2.0F3A.W0 F0 /r /i */
32156 /* RORX imm8, r/m64, r64a = VEX.LZ.F2.0F3A.W1 F0 /r /i */
32157 if (haveF2no66noF3(pfx
) && 0==getVexL(pfx
)/*LZ*/ && !haveREX(pfx
)) {
32158 Int size
= getRexW(pfx
) ? 8 : 4;
32159 IRType ty
= szToITy(size
);
32160 IRTemp src
= newTemp(ty
);
32161 UChar rm
= getUChar(delta
);
32164 if (epartIsReg(rm
)) {
32165 imm8
= getUChar(delta
+1);
32166 assign( src
, getIRegE(size
,pfx
,rm
) );
32167 DIP("rorx %d,%s,%s\n", imm8
, nameIRegE(size
,pfx
,rm
),
32168 nameIRegG(size
,pfx
,rm
));
32171 addr
= disAMode ( &alen
, vbi
, pfx
, delta
, dis_buf
, 0 );
32172 imm8
= getUChar(delta
+alen
);
32173 assign( src
, loadLE(ty
, mkexpr(addr
)) );
32174 DIP("rorx %d,%s,%s\n", imm8
, dis_buf
, nameIRegG(size
,pfx
,rm
));
32179 /* dst = (src >>u imm8) | (src << (size-imm8)) */
32180 putIRegG( size
, pfx
, rm
,
32181 imm8
== 0 ? mkexpr(src
)
32182 : binop( mkSizedOp(ty
,Iop_Or8
),
32183 binop( mkSizedOp(ty
,Iop_Shr8
), mkexpr(src
),
32185 binop( mkSizedOp(ty
,Iop_Shl8
), mkexpr(src
),
32186 mkU8(8*size
-imm8
) ) ) );
32187 /* Flags aren't modified. */
32188 goto decode_success
;
32205 /*------------------------------------------------------------*/
32207 /*--- Disassemble a single instruction ---*/
32209 /*------------------------------------------------------------*/
32211 /* Disassemble a single instruction into IR. The instruction is
32212 located in host memory at &guest_code[delta]. */
32215 DisResult
disInstr_AMD64_WRK (
32216 /*OUT*/Bool
* expect_CAS
,
32218 const VexArchInfo
* archinfo
,
32219 const VexAbiInfo
* vbi
,
32228 /* The running delta */
32229 Long delta
= delta64
;
32231 /* Holds eip at the start of the insn, so that we can print
32232 consistent error messages for unimplemented insns. */
32233 Long delta_start
= delta
;
32235 /* sz denotes the nominal data-op size of the insn; we change it to
32236 2 if an 0x66 prefix is seen and 8 if REX.W is 1. In case of
32237 conflict REX.W takes precedence. */
32240 /* pfx holds the summary of prefixes. */
32241 Prefix pfx
= PFX_EMPTY
;
32243 /* Holds the computed opcode-escape indication. */
32244 Escape esc
= ESC_NONE
;
32246 /* Set result defaults. */
32247 dres
.whatNext
= Dis_Continue
;
32249 dres
.jk_StopHere
= Ijk_INVALID
;
32250 dres
.hint
= Dis_HintNone
;
32251 *expect_CAS
= False
;
32253 vassert(guest_RIP_next_assumed
== 0);
32254 vassert(guest_RIP_next_mustcheck
== False
);
32256 t1
= t2
= IRTemp_INVALID
;
32258 DIP("\t0x%llx: ", guest_RIP_bbstart
+delta
);
32260 /* Spot "Special" instructions (see comment at top of file). */
32262 const UChar
* code
= guest_code
+ delta
;
32263 /* Spot the 16-byte preamble:
32264 48C1C703 rolq $3, %rdi
32265 48C1C70D rolq $13, %rdi
32266 48C1C73D rolq $61, %rdi
32267 48C1C733 rolq $51, %rdi
32269 if (code
[ 0] == 0x48 && code
[ 1] == 0xC1 && code
[ 2] == 0xC7
32270 && code
[ 3] == 0x03 &&
32271 code
[ 4] == 0x48 && code
[ 5] == 0xC1 && code
[ 6] == 0xC7
32272 && code
[ 7] == 0x0D &&
32273 code
[ 8] == 0x48 && code
[ 9] == 0xC1 && code
[10] == 0xC7
32274 && code
[11] == 0x3D &&
32275 code
[12] == 0x48 && code
[13] == 0xC1 && code
[14] == 0xC7
32276 && code
[15] == 0x33) {
32277 /* Got a "Special" instruction preamble. Which one is it? */
32278 if (code
[16] == 0x48 && code
[17] == 0x87
32279 && code
[18] == 0xDB /* xchgq %rbx,%rbx */) {
32280 /* %RDX = client_request ( %RAX ) */
32281 DIP("%%rdx = client_request ( %%rax )\n");
32283 jmp_lit(&dres
, Ijk_ClientReq
, guest_RIP_bbstart
+delta
);
32284 vassert(dres
.whatNext
== Dis_StopHere
);
32285 goto decode_success
;
32288 if (code
[16] == 0x48 && code
[17] == 0x87
32289 && code
[18] == 0xC9 /* xchgq %rcx,%rcx */) {
32290 /* %RAX = guest_NRADDR */
32291 DIP("%%rax = guest_NRADDR\n");
32293 putIRegRAX(8, IRExpr_Get( OFFB_NRADDR
, Ity_I64
));
32294 goto decode_success
;
32297 if (code
[16] == 0x48 && code
[17] == 0x87
32298 && code
[18] == 0xD2 /* xchgq %rdx,%rdx */) {
32299 /* call-noredir *%RAX */
32300 DIP("call-noredir *%%rax\n");
32302 t1
= newTemp(Ity_I64
);
32303 assign(t1
, getIRegRAX(8));
32304 t2
= newTemp(Ity_I64
);
32305 assign(t2
, binop(Iop_Sub64
, getIReg64(R_RSP
), mkU64(8)));
32306 putIReg64(R_RSP
, mkexpr(t2
));
32307 storeLE( mkexpr(t2
), mkU64(guest_RIP_bbstart
+delta
));
32308 jmp_treg(&dres
, Ijk_NoRedir
, t1
);
32309 vassert(dres
.whatNext
== Dis_StopHere
);
32310 goto decode_success
;
32313 if (code
[16] == 0x48 && code
[17] == 0x87
32314 && code
[18] == 0xff /* xchgq %rdi,%rdi */) {
32316 DIP("IR injection\n");
32317 vex_inject_ir(irsb
, Iend_LE
);
32319 // Invalidate the current insn. The reason is that the IRop we're
32320 // injecting here can change. In which case the translation has to
32321 // be redone. For ease of handling, we simply invalidate all the
32323 stmt(IRStmt_Put(OFFB_CMSTART
, mkU64(guest_RIP_curr_instr
)));
32324 stmt(IRStmt_Put(OFFB_CMLEN
, mkU64(19)));
32328 stmt( IRStmt_Put( OFFB_RIP
, mkU64(guest_RIP_bbstart
+ delta
) ) );
32329 dres
.whatNext
= Dis_StopHere
;
32330 dres
.jk_StopHere
= Ijk_InvalICache
;
32331 goto decode_success
;
32333 /* We don't know what it is. */
32334 goto decode_failure
;
32339 /* Eat prefixes, summarising the result in pfx and sz, and rejecting
32340 as many invalid combinations as possible. */
32343 if (n_prefixes
> 7) goto decode_failure
;
32344 pre
= getUChar(delta
);
32346 case 0x66: pfx
|= PFX_66
; break;
32347 case 0x67: pfx
|= PFX_ASO
; break;
32348 case 0xF2: pfx
|= PFX_F2
; break;
32349 case 0xF3: pfx
|= PFX_F3
; break;
32350 case 0xF0: pfx
|= PFX_LOCK
; *expect_CAS
= True
; break;
32351 case 0x2E: pfx
|= PFX_CS
; break;
32352 case 0x3E: pfx
|= PFX_DS
; break;
32353 case 0x26: pfx
|= PFX_ES
; break;
32354 case 0x64: pfx
|= PFX_FS
; break;
32355 case 0x65: pfx
|= PFX_GS
; break;
32356 case 0x36: pfx
|= PFX_SS
; break;
32357 case 0x40 ... 0x4F:
32359 if (pre
& (1<<3)) pfx
|= PFX_REXW
;
32360 if (pre
& (1<<2)) pfx
|= PFX_REXR
;
32361 if (pre
& (1<<1)) pfx
|= PFX_REXX
;
32362 if (pre
& (1<<0)) pfx
|= PFX_REXB
;
32365 goto not_a_legacy_prefix
;
32371 not_a_legacy_prefix
:
32372 /* We've used up all the non-VEX prefixes. Parse and validate a
32373 VEX prefix if that's appropriate. */
32374 if (archinfo
->hwcaps
& VEX_HWCAPS_AMD64_AVX
) {
32375 /* Used temporarily for holding VEX prefixes. */
32376 UChar vex0
= getUChar(delta
);
32377 if (vex0
== 0xC4) {
32379 UChar vex1
= getUChar(delta
+1);
32380 UChar vex2
= getUChar(delta
+2);
32383 /* Snarf contents of byte 1 */
32384 /* R */ pfx
|= (vex1
& (1<<7)) ? 0 : PFX_REXR
;
32385 /* X */ pfx
|= (vex1
& (1<<6)) ? 0 : PFX_REXX
;
32386 /* B */ pfx
|= (vex1
& (1<<5)) ? 0 : PFX_REXB
;
32388 switch (vex1
& 0x1F) {
32389 case 1: esc
= ESC_0F
; break;
32390 case 2: esc
= ESC_0F38
; break;
32391 case 3: esc
= ESC_0F3A
; break;
32392 /* Any other m-mmmm field will #UD */
32393 default: goto decode_failure
;
32395 /* Snarf contents of byte 2 */
32396 /* W */ pfx
|= (vex2
& (1<<7)) ? PFX_REXW
: 0;
32397 /* ~v3 */ pfx
|= (vex2
& (1<<6)) ? 0 : PFX_VEXnV3
;
32398 /* ~v2 */ pfx
|= (vex2
& (1<<5)) ? 0 : PFX_VEXnV2
;
32399 /* ~v1 */ pfx
|= (vex2
& (1<<4)) ? 0 : PFX_VEXnV1
;
32400 /* ~v0 */ pfx
|= (vex2
& (1<<3)) ? 0 : PFX_VEXnV0
;
32401 /* L */ pfx
|= (vex2
& (1<<2)) ? PFX_VEXL
: 0;
32403 switch (vex2
& 3) {
32405 case 1: pfx
|= PFX_66
; break;
32406 case 2: pfx
|= PFX_F3
; break;
32407 case 3: pfx
|= PFX_F2
; break;
32408 default: vassert(0);
32411 else if (vex0
== 0xC5) {
32413 UChar vex1
= getUChar(delta
+1);
32416 /* Snarf contents of byte 1 */
32417 /* R */ pfx
|= (vex1
& (1<<7)) ? 0 : PFX_REXR
;
32418 /* ~v3 */ pfx
|= (vex1
& (1<<6)) ? 0 : PFX_VEXnV3
;
32419 /* ~v2 */ pfx
|= (vex1
& (1<<5)) ? 0 : PFX_VEXnV2
;
32420 /* ~v1 */ pfx
|= (vex1
& (1<<4)) ? 0 : PFX_VEXnV1
;
32421 /* ~v0 */ pfx
|= (vex1
& (1<<3)) ? 0 : PFX_VEXnV0
;
32422 /* L */ pfx
|= (vex1
& (1<<2)) ? PFX_VEXL
: 0;
32424 switch (vex1
& 3) {
32426 case 1: pfx
|= PFX_66
; break;
32427 case 2: pfx
|= PFX_F3
; break;
32428 case 3: pfx
|= PFX_F2
; break;
32429 default: vassert(0);
32434 /* Can't have both VEX and REX */
32435 if ((pfx
& PFX_VEX
) && (pfx
& PFX_REX
))
32436 goto decode_failure
; /* can't have both */
32439 /* Dump invalid combinations */
32441 if (pfx
& PFX_F2
) n
++;
32442 if (pfx
& PFX_F3
) n
++;
32444 goto decode_failure
; /* can't have both */
32447 if (pfx
& PFX_CS
) n
++;
32448 if (pfx
& PFX_DS
) n
++;
32449 if (pfx
& PFX_ES
) n
++;
32450 if (pfx
& PFX_FS
) n
++;
32451 if (pfx
& PFX_GS
) n
++;
32452 if (pfx
& PFX_SS
) n
++;
32454 goto decode_failure
; /* multiple seg overrides == illegal */
32456 /* We have a %fs prefix. Reject it if there's no evidence in 'vbi'
32457 that we should accept it. */
32458 if ((pfx
& PFX_FS
) && !vbi
->guest_amd64_assume_fs_is_const
)
32459 goto decode_failure
;
32461 /* Ditto for %gs prefixes. */
32462 if ((pfx
& PFX_GS
) && !vbi
->guest_amd64_assume_gs_is_const
)
32463 goto decode_failure
;
32467 if (pfx
& PFX_66
) sz
= 2;
32468 if ((pfx
& PFX_REX
) && (pfx
& PFX_REXW
)) sz
= 8;
32470 /* Now we should be looking at the primary opcode byte or the
32471 leading escapes. Check that any LOCK prefix is actually
32473 if (haveLOCK(pfx
)) {
32474 if (can_be_used_with_LOCK_prefix( &guest_code
[delta
] )) {
32477 *expect_CAS
= False
;
32478 goto decode_failure
;
32482 /* Eat up opcode escape bytes, until we're really looking at the
32483 primary opcode byte. But only if there's no VEX present. */
32484 if (!(pfx
& PFX_VEX
)) {
32485 vassert(esc
== ESC_NONE
);
32486 pre
= getUChar(delta
);
32489 pre
= getUChar(delta
);
32491 case 0x38: esc
= ESC_0F38
; delta
++; break;
32492 case 0x3A: esc
= ESC_0F3A
; delta
++; break;
32493 default: esc
= ESC_0F
; break;
32498 /* So now we're really really looking at the primary opcode
32500 Long delta_at_primary_opcode
= delta
;
32502 if (!(pfx
& PFX_VEX
)) {
32503 /* Handle non-VEX prefixed instructions. "Legacy" (non-VEX) SSE
32504 instructions preserve the upper 128 bits of YMM registers;
32505 iow we can simply ignore the presence of the upper halves of
32506 these registers. */
32509 delta
= dis_ESC_NONE( &dres
, expect_CAS
,
32510 archinfo
, vbi
, pfx
, sz
, delta
);
32513 delta
= dis_ESC_0F ( &dres
, expect_CAS
,
32514 archinfo
, vbi
, pfx
, sz
, delta
);
32517 delta
= dis_ESC_0F38( &dres
,
32518 archinfo
, vbi
, pfx
, sz
, delta
);
32521 delta
= dis_ESC_0F3A( &dres
,
32522 archinfo
, vbi
, pfx
, sz
, delta
);
32528 /* VEX prefixed instruction */
32529 /* Sloppy Intel wording: "An instruction encoded with a VEX.128
32530 prefix that loads a YMM register operand ..." zeroes out bits
32531 128 and above of the register. */
32532 Bool uses_vvvv
= False
;
32535 delta
= dis_ESC_0F__VEX ( &dres
, &uses_vvvv
,
32536 archinfo
, vbi
, pfx
, sz
, delta
);
32539 delta
= dis_ESC_0F38__VEX ( &dres
, &uses_vvvv
,
32540 archinfo
, vbi
, pfx
, sz
, delta
);
32543 delta
= dis_ESC_0F3A__VEX ( &dres
, &uses_vvvv
,
32544 archinfo
, vbi
, pfx
, sz
, delta
);
32547 /* The presence of a VEX prefix, by Intel definition,
32548 always implies at least an 0F escape. */
32549 goto decode_failure
;
32553 /* If the insn doesn't use VEX.vvvv then it must be all ones.
32556 if (getVexNvvvv(pfx
) != 0)
32557 goto decode_failure
;
32561 vassert(delta
- delta_at_primary_opcode
>= 0);
32562 vassert(delta
- delta_at_primary_opcode
< 16/*let's say*/);
32564 /* Use delta == delta_at_primary_opcode to denote decode failure.
32565 This implies that any successful decode must use at least one
32567 if (delta
== delta_at_primary_opcode
)
32568 goto decode_failure
;
32570 goto decode_success
; /* \o/ */
32574 /* All decode failures end up here. */
32576 vex_printf("vex amd64->IR: unhandled instruction bytes: "
32577 "0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
32578 getUChar(delta_start
+0),
32579 getUChar(delta_start
+1),
32580 getUChar(delta_start
+2),
32581 getUChar(delta_start
+3),
32582 getUChar(delta_start
+4),
32583 getUChar(delta_start
+5),
32584 getUChar(delta_start
+6),
32585 getUChar(delta_start
+7),
32586 getUChar(delta_start
+8),
32587 getUChar(delta_start
+9) );
32588 vex_printf("vex amd64->IR: REX=%d REX.W=%d REX.R=%d REX.X=%d REX.B=%d\n",
32589 haveREX(pfx
) ? 1 : 0, getRexW(pfx
), getRexR(pfx
),
32590 getRexX(pfx
), getRexB(pfx
));
32591 vex_printf("vex amd64->IR: VEX=%d VEX.L=%d VEX.nVVVV=0x%x ESC=%s\n",
32592 haveVEX(pfx
) ? 1 : 0, getVexL(pfx
),
32594 esc
==ESC_NONE
? "NONE" :
32595 esc
==ESC_0F
? "0F" :
32596 esc
==ESC_0F38
? "0F38" :
32597 esc
==ESC_0F3A
? "0F3A" : "???");
32598 vex_printf("vex amd64->IR: PFX.66=%d PFX.F2=%d PFX.F3=%d\n",
32599 have66(pfx
) ? 1 : 0, haveF2(pfx
) ? 1 : 0,
32600 haveF3(pfx
) ? 1 : 0);
32603 /* Tell the dispatcher that this insn cannot be decoded, and so has
32604 not been executed, and (is currently) the next to be executed.
32605 RIP should be up-to-date since it made so at the start of each
32606 insn, but nevertheless be paranoid and update it again right
32608 stmt( IRStmt_Put( OFFB_RIP
, mkU64(guest_RIP_curr_instr
) ) );
32609 jmp_lit(&dres
, Ijk_NoDecode
, guest_RIP_curr_instr
);
32610 vassert(dres
.whatNext
== Dis_StopHere
);
32612 /* We also need to say that a CAS is not expected now, regardless
32613 of what it might have been set to at the start of the function,
32614 since the IR that we've emitted just above (to synthesis a
32615 SIGILL) does not involve any CAS, and presumably no other IR has
32616 been emitted for this (non-decoded) insn. */
32617 *expect_CAS
= False
;
32622 /* All decode successes end up here. */
32623 switch (dres
.whatNext
) {
32625 stmt( IRStmt_Put( OFFB_RIP
, mkU64(guest_RIP_bbstart
+ delta
) ) );
32634 dres
.len
= toUInt(delta
- delta_start
);
32642 /*------------------------------------------------------------*/
32643 /*--- Top-level fn ---*/
32644 /*------------------------------------------------------------*/
32646 /* Disassemble a single instruction into IR. The instruction
32647 is located in host memory at &guest_code[delta]. */
32649 DisResult
disInstr_AMD64 ( IRSB
* irsb_IN
,
32650 const UChar
* guest_code_IN
,
32653 VexArch guest_arch
,
32654 const VexArchInfo
* archinfo
,
32655 const VexAbiInfo
* abiinfo
,
32656 VexEndness host_endness_IN
,
32657 Bool sigill_diag_IN
)
32660 Bool expect_CAS
, has_CAS
;
32663 /* Set globals (see top of this file) */
32664 vassert(guest_arch
== VexArchAMD64
);
32665 guest_code
= guest_code_IN
;
32667 host_endness
= host_endness_IN
;
32668 guest_RIP_curr_instr
= guest_IP
;
32669 guest_RIP_bbstart
= guest_IP
- delta
;
32671 /* We'll consult these after doing disInstr_AMD64_WRK. */
32672 guest_RIP_next_assumed
= 0;
32673 guest_RIP_next_mustcheck
= False
;
32675 x1
= irsb_IN
->stmts_used
;
32676 expect_CAS
= False
;
32677 dres
= disInstr_AMD64_WRK ( &expect_CAS
,
32678 delta
, archinfo
, abiinfo
, sigill_diag_IN
);
32679 x2
= irsb_IN
->stmts_used
;
32682 /* If disInstr_AMD64_WRK tried to figure out the next rip, check it
32683 got it right. Failure of this assertion is serious and denotes
32684 a bug in disInstr. */
32685 if (guest_RIP_next_mustcheck
32686 && guest_RIP_next_assumed
!= guest_RIP_curr_instr
+ dres
.len
) {
32688 vex_printf("assumed next %%rip = 0x%llx\n",
32689 guest_RIP_next_assumed
);
32690 vex_printf(" actual next %%rip = 0x%llx\n",
32691 guest_RIP_curr_instr
+ dres
.len
);
32692 vpanic("disInstr_AMD64: disInstr miscalculated next %rip");
32695 /* See comment at the top of disInstr_AMD64_WRK for meaning of
32696 expect_CAS. Here, we (sanity-)check for the presence/absence of
32697 IRCAS as directed by the returned expect_CAS value. */
32699 for (i
= x1
; i
< x2
; i
++) {
32700 if (irsb_IN
->stmts
[i
]->tag
== Ist_CAS
)
32704 if (expect_CAS
!= has_CAS
) {
32705 /* inconsistency detected. re-disassemble the instruction so as
32706 to generate a useful error message; then assert. */
32707 vex_traceflags
|= VEX_TRACE_FE
;
32708 dres
= disInstr_AMD64_WRK ( &expect_CAS
,
32709 delta
, archinfo
, abiinfo
, sigill_diag_IN
);
32710 for (i
= x1
; i
< x2
; i
++) {
32711 vex_printf("\t\t");
32712 ppIRStmt(irsb_IN
->stmts
[i
]);
32715 /* Failure of this assertion is serious and denotes a bug in
32717 vpanic("disInstr_AMD64: inconsistency in LOCK prefix handling");
32724 /*------------------------------------------------------------*/
32725 /*--- Unused stuff ---*/
32726 /*------------------------------------------------------------*/
32728 // A potentially more Memcheck-friendly version of gen_LZCNT, if
32729 // this should ever be needed.
32731 //static IRTemp gen_LZCNT ( IRType ty, IRTemp src )
32733 // /* Scheme is simple: propagate the most significant 1-bit into all
32734 // lower positions in the word. This gives a word of the form
32735 // 0---01---1. Now invert it, giving a word of the form
32736 // 1---10---0, then do a population-count idiom (to count the 1s,
32737 // which is the number of leading zeroes, or the word size if the
32738 // original word was 0.
32742 // for (i = 0; i < 7; i++) {
32743 // t[i] = newTemp(ty);
32745 // if (ty == Ity_I64) {
32746 // assign(t[0], binop(Iop_Or64, mkexpr(src),
32747 // binop(Iop_Shr64, mkexpr(src), mkU8(1))));
32748 // assign(t[1], binop(Iop_Or64, mkexpr(t[0]),
32749 // binop(Iop_Shr64, mkexpr(t[0]), mkU8(2))));
32750 // assign(t[2], binop(Iop_Or64, mkexpr(t[1]),
32751 // binop(Iop_Shr64, mkexpr(t[1]), mkU8(4))));
32752 // assign(t[3], binop(Iop_Or64, mkexpr(t[2]),
32753 // binop(Iop_Shr64, mkexpr(t[2]), mkU8(8))));
32754 // assign(t[4], binop(Iop_Or64, mkexpr(t[3]),
32755 // binop(Iop_Shr64, mkexpr(t[3]), mkU8(16))));
32756 // assign(t[5], binop(Iop_Or64, mkexpr(t[4]),
32757 // binop(Iop_Shr64, mkexpr(t[4]), mkU8(32))));
32758 // assign(t[6], unop(Iop_Not64, mkexpr(t[5])));
32759 // return gen_POPCOUNT(ty, t[6]);
32761 // if (ty == Ity_I32) {
32762 // assign(t[0], binop(Iop_Or32, mkexpr(src),
32763 // binop(Iop_Shr32, mkexpr(src), mkU8(1))));
32764 // assign(t[1], binop(Iop_Or32, mkexpr(t[0]),
32765 // binop(Iop_Shr32, mkexpr(t[0]), mkU8(2))));
32766 // assign(t[2], binop(Iop_Or32, mkexpr(t[1]),
32767 // binop(Iop_Shr32, mkexpr(t[1]), mkU8(4))));
32768 // assign(t[3], binop(Iop_Or32, mkexpr(t[2]),
32769 // binop(Iop_Shr32, mkexpr(t[2]), mkU8(8))));
32770 // assign(t[4], binop(Iop_Or32, mkexpr(t[3]),
32771 // binop(Iop_Shr32, mkexpr(t[3]), mkU8(16))));
32772 // assign(t[5], unop(Iop_Not32, mkexpr(t[4])));
32773 // return gen_POPCOUNT(ty, t[5]);
32775 // if (ty == Ity_I16) {
32776 // assign(t[0], binop(Iop_Or16, mkexpr(src),
32777 // binop(Iop_Shr16, mkexpr(src), mkU8(1))));
32778 // assign(t[1], binop(Iop_Or16, mkexpr(t[0]),
32779 // binop(Iop_Shr16, mkexpr(t[0]), mkU8(2))));
32780 // assign(t[2], binop(Iop_Or16, mkexpr(t[1]),
32781 // binop(Iop_Shr16, mkexpr(t[1]), mkU8(4))));
32782 // assign(t[3], binop(Iop_Or16, mkexpr(t[2]),
32783 // binop(Iop_Shr16, mkexpr(t[2]), mkU8(8))));
32784 // assign(t[4], unop(Iop_Not16, mkexpr(t[3])));
32785 // return gen_POPCOUNT(ty, t[4]);
32791 /*--------------------------------------------------------------------*/
32792 /*--- end guest_amd64_toIR.c ---*/
32793 /*--------------------------------------------------------------------*/